From 5b336ac46f60606cdcf77889d624ce15cdd62530 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 19 Oct 2015 15:28:19 +0200 Subject: Implement package search by terms --- brep/buildfile | 2 +- brep/package | 84 ++++++++++++++++++++++++-------- brep/package-extra.sql | 103 ++++++++++++++++++++++++++++++++++++++++ brep/package-search.cxx | 58 +++++++++++++--------- brep/package-traits | 38 +++++++++++++++ brep/package-traits.cxx | 70 +++++++++++++++++++++++++++ brep/package-version-search.cxx | 87 ++++++++++++++++----------------- brep/package.cxx | 42 ++++++++++++++++ brep/page | 14 ++++++ brep/page.cxx | 11 +++++ 10 files changed, 419 insertions(+), 90 deletions(-) create mode 100644 brep/package-extra.sql create mode 100644 brep/package-traits create mode 100644 brep/package-traits.cxx (limited to 'brep') diff --git a/brep/buildfile b/brep/buildfile index c903b37..79dadc2 100644 --- a/brep/buildfile +++ b/brep/buildfile @@ -12,7 +12,7 @@ import libs += libodb%lib{odb} import libs += libodb-pgsql%lib{odb-pgsql} import libs += libbpkg%lib{bpkg} -brep = cxx{package package-odb} +brep = cxx{package package-traits package-odb} libso{brep}: $brep $libs libso{brep}: cxx.export.poptions = -I$out_root -I$src_root diff --git a/brep/package b/brep/package index aaba0ac..7713288 100644 --- a/brep/package +++ b/brep/package @@ -29,7 +29,6 @@ namespace brep // Use an image type to map bpkg::version to the database since there // is no way to modify individual components directly. // - #pragma db value struct _version { @@ -300,6 +299,26 @@ namespace brep } }; + // The 'to' expression calls the PostgreSQL to_tsvector(weighted_text) + // function overload (package-extra.sql). Since we are only interested + // in "write-only" members of this type, make the 'from' expression + // always return empty string (we still have to work the placeholder + // in to keep overprotective ODB happy). + // + #pragma db map type("tsvector") as("TEXT") \ + to("to_tsvector((?)::weighted_text)") from("COALESCE('',(?))") + + // C++ type for weighted PostgreSQL tsvector. + // + #pragma db value type("tsvector") + struct weighted_text + { + std::string a; + std::string b; + std::string c; + std::string d; + }; + #pragma db object pointer(std::shared_ptr) session class package { @@ -429,51 +448,74 @@ namespace brep set(odb::nested_set (this.requirements, move (?))) \ id_column("") key_column("") value_column("id") + // external_repositories + // #pragma db member(external_repositories) \ id_column("") value_column("repository") value_not_null + // search_index + // + #pragma db member(search_index) virtual(weighted_text) null \ + access(search_text) + + #pragma db index method("GIN") member(search_index) + private: friend class odb::access; package () = default; + + // Save keywords, summary, description, and changes to weighted_text + // a, b, c, d members, respectively. So a word found in keywords will + // have a higher weight than if it's found in the summary. + // + weighted_text + search_text () const; + + // Noop as search_index is a write-only member. + // + void + search_text (const weighted_text&) {} }; - // Find an internal package of the latest version. + // Package search query matching rank. // - #pragma db view object(package) \ - object(package = p: package::id.name == p::id.name && \ - package::id.version < p::id.version) \ - query((package::internal_repository.is_not_null () && \ - p::id.name.is_null ()) + "AND" + (?)) - struct latest_internal_package + #pragma db view query("/*CALL*/ SELECT * FROM search_latest_packages(?)") + struct latest_package_search_rank { - using package_type = brep::package; - std::shared_ptr package; - - operator const std::shared_ptr& () const {return package;} - explicit operator package_type& () const {return *package;} + package_id id; + double rank; }; - // Count number of internal packages distinct names. - // - #pragma db view object(package) \ - query(package::internal_repository.is_not_null () && (?)) - struct internal_package_name_count + #pragma db view \ + query("/*CALL*/ SELECT count(*) FROM search_latest_packages(?)") + struct latest_package_count { - #pragma db column("count(DISTINCT" + package::id.name + ")") std::size_t result; operator std::size_t () const {return result;} }; - #pragma db view object(package) + #pragma db view query("/*CALL*/ SELECT * FROM search_packages(?)") + struct package_search_rank + { + package_id id; + double rank; + }; + + #pragma db view query("/*CALL*/ SELECT count(*) FROM search_packages(?)") struct package_count { - #pragma db column("count(*)") std::size_t result; operator std::size_t () const {return result;} }; + #pragma db view query("/*CALL*/ SELECT * FROM latest_package(?)") + struct latest_package + { + package_id id; + }; + // Version comparison operators. // // They allow comparing objects that have epoch, canonical_upstream diff --git a/brep/package-extra.sql b/brep/package-extra.sql new file mode 100644 index 0000000..d30a48e --- /dev/null +++ b/brep/package-extra.sql @@ -0,0 +1,103 @@ +DROP TYPE IF EXISTS weighted_text CASCADE; +CREATE TYPE weighted_text AS (a TEXT, b TEXT, c TEXT, d TEXT); + +DROP FUNCTION IF EXISTS to_tsvector(IN document weighted_text); +DROP FUNCTION IF EXISTS search_packages(IN query tsquery, INOUT name TEXT); +DROP FUNCTION IF EXISTS search_latest_packages(IN query tsquery); +DROP FUNCTION IF EXISTS latest_package(INOUT name TEXT); +DROP FUNCTION IF EXISTS latest_packages(); + +-- Return the latest versions of internal packages as a set of package rows. +-- +CREATE FUNCTION +latest_packages() +RETURNS SETOF package AS $$ + SELECT p1.* + FROM package p1 LEFT JOIN package p2 ON ( + p1.name = p2.name AND + (p1.version_epoch < p2.version_epoch OR + p1.version_epoch = p2.version_epoch AND + (p1.version_canonical_upstream < p2.version_canonical_upstream OR + p1.version_canonical_upstream = p2.version_canonical_upstream AND + p1.version_revision < p2.version_revision))) + WHERE + p1.internal_repository IS NOT NULL AND p2.name IS NULL; +$$ LANGUAGE SQL STABLE; + +-- Find the latest version of an internal package having the specified name. +-- Return a single row containing the package id, empty row set if the package +-- not found. +-- +CREATE FUNCTION +latest_package(INOUT name TEXT, + OUT version_epoch SMALLINT, + OUT version_canonical_upstream TEXT, + OUT version_revision SMALLINT) +RETURNS SETOF record AS $$ + SELECT name, version_epoch, version_canonical_upstream, version_revision + FROM latest_packages() + WHERE name = latest_package.name; +$$ LANGUAGE SQL STABLE; + +-- Search for the latest version of an internal packages matching the specified +-- search query. Return a set of rows containing the package id and search +-- rank. If query is NULL, then match all packages and return 0 rank for +-- all rows. +-- +CREATE FUNCTION +search_latest_packages(IN query tsquery, + OUT name TEXT, + OUT version_epoch SMALLINT, + OUT version_canonical_upstream TEXT, + OUT version_revision SMALLINT, + OUT rank real) +RETURNS SETOF record AS $$ + SELECT name, version_epoch, version_canonical_upstream, version_revision, + CASE + WHEN query IS NULL THEN 0 +-- Weight mapping: D C B A + ELSE ts_rank_cd('{0.05, 0.2, 0.9, 1.0}', search_index, query) + END AS rank + FROM latest_packages() + WHERE query IS NULL OR search_index @@ query; +$$ LANGUAGE SQL STABLE; + +-- Search for packages matching the search query and having the specified name. +-- Return a set of rows containing the package id and search rank. If query +-- is NULL, then match all packages and return 0 rank for all rows. +-- +CREATE FUNCTION +search_packages(IN query tsquery, + INOUT name TEXT, + OUT version_epoch SMALLINT, + OUT version_canonical_upstream TEXT, + OUT version_revision SMALLINT, + OUT rank real) +RETURNS SETOF record AS $$ + SELECT name, version_epoch, version_canonical_upstream, version_revision, + CASE + WHEN query IS NULL THEN 0 +-- Weight mapping: D C B A + ELSE ts_rank_cd('{0.05, 0.2, 0.9, 1.0}', search_index, query) + END AS rank + FROM package + WHERE + internal_repository IS NOT NULL AND name = search_packages.name AND + (query IS NULL OR search_index @@ query); +$$ LANGUAGE SQL STABLE; + +-- Parse weighted_text to tsvector. +-- +CREATE FUNCTION +to_tsvector(IN document weighted_text) +RETURNS tsvector AS $$ + SELECT + CASE + WHEN document IS NULL THEN NULL + ELSE + setweight(to_tsvector(document.a), 'A') || + setweight(to_tsvector(document.b), 'B') || + setweight(to_tsvector(document.c), 'C') || + setweight(to_tsvector(document.d), 'D') + END +$$ LANGUAGE SQL IMMUTABLE; diff --git a/brep/package-search.cxx b/brep/package-search.cxx index ebc5055..ffe5cb9 100644 --- a/brep/package-search.cxx +++ b/brep/package-search.cxx @@ -40,6 +40,18 @@ namespace brep db_ = shared_database (options_->db_host (), options_->db_port ()); } + template + static inline query + search_param (const string& q) + { + using query = query; + return "(" + + (q.empty () + ? query ("NULL") + : "plainto_tsquery (" + query::_val (q) + ")") + + ")"; + } + void package_search:: handle (request& rq, response& rs) { @@ -77,62 +89,62 @@ namespace brep << "#packages {font-size: x-large;}" << ident << ".package {margin: 0.5em 0 0;}" << ident << ".name {font-size: x-large;}" << ident - << ".tags {margin: 0.3em 0 0;}" + << ".tags {margin: 0.3em 0 0;}" << ident + << "form {margin: 0.5em 0 0 0;}" << ~CSS_STYLE << ~HEAD << BODY; - string q (pr.query ().empty () ? "" : "q=" + mime_url_encode (pr.query ())); + const string& sq (pr.query ()); // Search query. + string qp (sq.empty () ? "" : "q=" + mime_url_encode (sq)); size_t rop (options_->results_on_page ()); transaction t (db_->begin ()); - // @@ Query will include search criteria if specified. - // - size_t pc (db_->query_value ()); - - s << DIV(ID="packages") << "Packages (" << pc << ")" << ~DIV; + size_t pc ( + db_->query_value ( + search_param (sq))); - // @@ Query will also include search criteria if specified. - // - using query = query; + s << DIV(ID="packages") << "Packages (" << pc << ")" << ~DIV + << FORM_SEARCH (sq); auto r ( - db_->query (query (true) + - "ORDER BY" + query::package::id.name + + db_->query ( + search_param (sq) + + "ORDER BY rank DESC, name" + "OFFSET" + to_string (pr.page () * rop) + "LIMIT" + to_string (rop))); - for (const auto& ip: r) + for (const auto& pr: r) { - const package& p (ip); + shared_ptr p (db_->load (pr.id)); s << DIV(CLASS="package") << DIV(CLASS="name") << A - << HREF << "/go/" << mime_url_encode (p.id.name); + << HREF << "/go/" << mime_url_encode (p->id.name); // Propagate search criteria to the package version search url. // - if (!q.empty ()) - s << "?" << q; + if (!qp.empty ()) + s << "?" << qp; s << ~HREF - << p.id.name + << p->id.name << ~A << ~DIV - << DIV(CLASS="summary") << p.summary << ~DIV - << DIV_TAGS (p.tags) - << DIV_LICENSES (p.license_alternatives) + << DIV(CLASS="summary") << p->summary << ~DIV + << DIV_TAGS (p->tags) + << DIV_LICENSES (p->license_alternatives) << DIV(CLASS="dependencies") - << "Dependencies: " << p.dependencies.size () + << "Dependencies: " << p->dependencies.size () << ~DIV << ~DIV; } t.commit (); - string u (q.empty () ? "/" : ("/?" + q)); + string u (qp.empty () ? "/" : ("/?" + qp)); s << DIV_PAGER (pr.page (), pc, rop, options_->pages_in_pager (), u) << ~BODY diff --git a/brep/package-traits b/brep/package-traits new file mode 100644 index 0000000..21ec0b7 --- /dev/null +++ b/brep/package-traits @@ -0,0 +1,38 @@ +// file : brep/package-traits -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BREP_PACKAGE_TRAITS +#define BREP_PACKAGE_TRAITS + +#include // size_t + +#include + +#include // weighted_text + +namespace odb +{ + namespace pgsql + { + template <> + class value_traits + { + public: + typedef brep::weighted_text value_type; + typedef value_type query_type; + typedef details::buffer image_type; + + static void + set_value (value_type&, const details::buffer&, std::size_t, bool) {} + + static void + set_image (details::buffer&, + std::size_t& n, + bool& is_null, + const value_type&); + }; + } +} + +#endif // BREP_PACKAGE_TRAITS diff --git a/brep/package-traits.cxx b/brep/package-traits.cxx new file mode 100644 index 0000000..cd30d81 --- /dev/null +++ b/brep/package-traits.cxx @@ -0,0 +1,70 @@ +// file : brep/package-traits.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include +#include +#include // memcpy + +#include + +using namespace std; + +namespace odb +{ + namespace pgsql + { + static inline void + to_pg_string (ostream& os, const string& s) + { + os << '"'; + + for (auto c: s) + { + if (c == '\\' || c == '"') + os << '\\'; + + os << c; + } + + os << '"'; + } + + // Convert C++ weighted_text struct to PostgreSQL weighted_text + // composite type. + // + void value_traits:: + set_image (details::buffer& b, + size_t& n, + bool& is_null, + const value_type& v) + { + is_null = v.a.empty () && v.b.empty () && v.c.empty () && v.d.empty (); + + if (!is_null) + { + ostringstream o; + o << "("; + to_pg_string (o, v.a); + o << ","; + to_pg_string (o, v.b); + o << ","; + to_pg_string (o, v.c); + o << ","; + to_pg_string (o, v.d); + o << ")"; + + const string& s (o.str ()); + n = s.size (); + + if (n > b.capacity ()) + b.capacity (n); + + memcpy (b.data (), s.c_str (), n); + } + } + } +} diff --git a/brep/package-version-search.cxx b/brep/package-version-search.cxx index 933aaa8..84c06e1 100644 --- a/brep/package-version-search.cxx +++ b/brep/package-version-search.cxx @@ -40,6 +40,21 @@ namespace brep db_ = shared_database (options_->db_host (), options_->db_port ()); } + template + static inline query + search_params (const string& n, const string& q) + { + using query = query; + + return "(" + + (q.empty () + ? query ("NULL") + : "plainto_tsquery (" + query::_val (q) + ")") + + "," + + query::_val (n) + + ")"; + } + void package_version_search:: handle (request& rq, response& rs) { @@ -82,28 +97,29 @@ namespace brep << "#versions {font-size: x-large; margin: 0.5em 0 0;}" << ident << ".package_version {margin: 0.5em 0 0;}" << ident << ".version {font-size: x-large;}" << ident - << ".priority {margin: 0.3em 0 0;}" + << ".priority {margin: 0.3em 0 0;}" << ident + << "form {margin: 0.5em 0 0 0;}" << ~CSS_STYLE << ~HEAD << BODY << DIV(ID="name") << name << ~DIV; + const string& sq (pr.query ()); // Search query. size_t rop (options_->results_on_page ()); transaction t (db_->begin ()); shared_ptr p; { - using query = query; - - latest_internal_package ip; - if (!db_->query_one ( - query::package::id.name == name, ip)) + latest_package lp; + if (!db_->query_one ( + query( + "(" + query::_val (name) + ")"), lp)) { throw invalid_request (404, "Package '" + name + "' not found"); } - p = ip; + p = db_->load (lp.id); } s << DIV(ID="summary") << p->summary << ~DIV @@ -115,56 +131,37 @@ namespace brep s << DIV_TAGS (p->tags); - size_t pvc; - { - using query = query; - - // @@ Query will also include search criteria if specified. - // - pvc = db_->query_value ( - query::id.name == name && query::internal_repository.is_not_null ()); - } - - s << DIV(ID="versions") << "Versions (" << pvc << ")" << ~DIV; - - // @@ Need to find some better place for package url and email or drop them - // from this page totally. - // -/* - if (p->package_url) - s << DIV_URL (*p->package_url); + size_t pvc ( + db_->query_value ( + search_params (name, sq))); - if (p->package_email) - s << DIV_EMAIL (*p->package_email); -*/ + s << DIV(ID="versions") << "Versions (" << pvc << ")" << ~DIV + << FORM_SEARCH (sq); - // @@ Use appropriate view when clarify which package info to be displayed - // and search index structure get implemented. Query will also include - // search criteria if specified. - // - using query = query; auto r ( - db_->query ( - (query::id.name == name && query::internal_repository.is_not_null ()) + - order_by_version_desc (query::id.version) + + db_->query ( + search_params (name, sq) + + "ORDER BY rank DESC, version_epoch DESC, " + "version_canonical_upstream DESC, version_revision DESC" + "OFFSET" + to_string (pr.page () * rop) + "LIMIT" + to_string (rop))); - for (const auto& v: r) + for (const auto& pr: r) { - const string& vs (v.version.string ()); + shared_ptr p (db_->load (pr.id)); + const string& v (p->version.string ()); s << DIV(CLASS="package_version") << DIV(CLASS="version") << A - << HREF << "/go/" << mime_url_encode (name) << "/" << vs << ~HREF - << vs + << HREF << "/go/" << mime_url_encode (name) << "/" << v << ~HREF + << v << ~A << ~DIV - << DIV_PRIORITY (v.priority) - << DIV_LICENSES (v.license_alternatives) + << DIV_PRIORITY (p->priority) + << DIV_LICENSES (p->license_alternatives) << DIV(CLASS="dependencies") - << "Dependencies: " << v.dependencies.size () + << "Dependencies: " << p->dependencies.size () << ~DIV << ~DIV; } @@ -172,8 +169,8 @@ namespace brep t.commit (); string u (mime_url_encode (name)); - if (!pr.query ().empty ()) - u += "?q=" + mime_url_encode (pr.query ()); + if (!sq.empty ()) + u += "?q=" + mime_url_encode (sq); s << DIV_PAGER (pr.page (), pvc, rop, options_->pages_in_pager (), u) << ~BODY diff --git a/brep/package.cxx b/brep/package.cxx index fd40bc6..9b7d24a 100644 --- a/brep/package.cxx +++ b/brep/package.cxx @@ -66,6 +66,48 @@ namespace brep external_repositories.emplace_back (move (rp)); } + weighted_text package:: + search_text () const + { + if (internal_repository == nullptr) + return weighted_text (); + + // Derive keywords from the basic package information: name, + // version. + // + //@@ What about 'stable' from cppget.org/stable? Add path of + // the repository to keywords? Or is it too "polluting" and + // we will handle it in some other way (e.g., by allowing + // the user to specify repo location in the drop-down box)? + // Probably drop-box would be better as also tells what are + // the available internal repositories. + // + string k (id.name + " " + version.string () + " " + version.string (true)); + + // Add tags to keywords. + // + for (const auto& t: tags) + k += " " + t; + + // Add licenses to keywords. + // + for (const auto& la: license_alternatives) + { + for (const auto& l: la) + { + k += " " + l; + + // If license is say LGPLv2 then LGPL is also a keyword. + // + size_t n (l.size ()); + if (n > 2 && l[n - 2] == 'v' && l[n - 1] >= '0' && l[n - 1] <= '9') + k += " " + string (l, 0, n - 2); + } + } + + return {move (k), summary, description ? *description : "", changes}; + } + // repository // repository:: diff --git a/brep/page b/brep/page index d14bb41..b923c7f 100644 --- a/brep/page +++ b/brep/page @@ -124,6 +124,20 @@ namespace brep private: const priority& priority_; }; + + // Generates package search element. + // + class FORM_SEARCH + { + public: + FORM_SEARCH (const std::string& q): query_ (q) {} + + void + operator() (xml::serializer& s) const; + + private: + const std::string& query_; + }; } #endif // BREP_PAGE diff --git a/brep/page.cxx b/brep/page.cxx index b59cd4b..c68a7eb 100644 --- a/brep/page.cxx +++ b/brep/page.cxx @@ -206,4 +206,15 @@ namespace brep << "Priority: " << priority_names[priority_] << ~DIV; } + + // FORM_SEARCH + // + void FORM_SEARCH:: + operator() (serializer& s) const + { + s << FORM + << *INPUT(TYPE="search", NAME="q", VALUE=query_) + << *INPUT(TYPE="submit", VALUE="Search") + << ~FORM; + } } -- cgit v1.1