aboutsummaryrefslogtreecommitdiff
path: root/brep
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-10-19 15:28:19 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-10-28 01:53:51 +0200
commit5b336ac46f60606cdcf77889d624ce15cdd62530 (patch)
tree469c0dd598b072d13b9a27f458c96c8353745638 /brep
parent3e37999a5f9efd4caf44c40985b3e1254660a625 (diff)
Implement package search by terms
Diffstat (limited to 'brep')
-rw-r--r--brep/buildfile2
-rw-r--r--brep/package84
-rw-r--r--brep/package-extra.sql103
-rw-r--r--brep/package-search.cxx58
-rw-r--r--brep/package-traits38
-rw-r--r--brep/package-traits.cxx70
-rw-r--r--brep/package-version-search.cxx87
-rw-r--r--brep/package.cxx42
-rw-r--r--brep/page14
-rw-r--r--brep/page.cxx11
10 files changed, 419 insertions, 90 deletions
diff --git a/brep/buildfile b/brep/buildfile
index c903b37..79dadc2 100644
--- a/brep/buildfile
+++ b/brep/buildfile
@@ -12,7 +12,7 @@ import libs += libodb%lib{odb}
import libs += libodb-pgsql%lib{odb-pgsql}
import libs += libbpkg%lib{bpkg}
-brep = cxx{package package-odb}
+brep = cxx{package package-traits package-odb}
libso{brep}: $brep $libs
libso{brep}: cxx.export.poptions = -I$out_root -I$src_root
diff --git a/brep/package b/brep/package
index aaba0ac..7713288 100644
--- a/brep/package
+++ b/brep/package
@@ -29,7 +29,6 @@ namespace brep
// Use an image type to map bpkg::version to the database since there
// is no way to modify individual components directly.
//
-
#pragma db value
struct _version
{
@@ -300,6 +299,26 @@ namespace brep
}
};
+ // The 'to' expression calls the PostgreSQL to_tsvector(weighted_text)
+ // function overload (package-extra.sql). Since we are only interested
+ // in "write-only" members of this type, make the 'from' expression
+ // always return empty string (we still have to work the placeholder
+ // in to keep overprotective ODB happy).
+ //
+ #pragma db map type("tsvector") as("TEXT") \
+ to("to_tsvector((?)::weighted_text)") from("COALESCE('',(?))")
+
+ // C++ type for weighted PostgreSQL tsvector.
+ //
+ #pragma db value type("tsvector")
+ struct weighted_text
+ {
+ std::string a;
+ std::string b;
+ std::string c;
+ std::string d;
+ };
+
#pragma db object pointer(std::shared_ptr) session
class package
{
@@ -429,51 +448,74 @@ namespace brep
set(odb::nested_set (this.requirements, move (?))) \
id_column("") key_column("") value_column("id")
+ // external_repositories
+ //
#pragma db member(external_repositories) \
id_column("") value_column("repository") value_not_null
+ // search_index
+ //
+ #pragma db member(search_index) virtual(weighted_text) null \
+ access(search_text)
+
+ #pragma db index method("GIN") member(search_index)
+
private:
friend class odb::access;
package () = default;
+
+ // Save keywords, summary, description, and changes to weighted_text
+ // a, b, c, d members, respectively. So a word found in keywords will
+ // have a higher weight than if it's found in the summary.
+ //
+ weighted_text
+ search_text () const;
+
+ // Noop as search_index is a write-only member.
+ //
+ void
+ search_text (const weighted_text&) {}
};
- // Find an internal package of the latest version.
+ // Package search query matching rank.
//
- #pragma db view object(package) \
- object(package = p: package::id.name == p::id.name && \
- package::id.version < p::id.version) \
- query((package::internal_repository.is_not_null () && \
- p::id.name.is_null ()) + "AND" + (?))
- struct latest_internal_package
+ #pragma db view query("/*CALL*/ SELECT * FROM search_latest_packages(?)")
+ struct latest_package_search_rank
{
- using package_type = brep::package;
- std::shared_ptr<package_type> package;
-
- operator const std::shared_ptr<package_type>& () const {return package;}
- explicit operator package_type& () const {return *package;}
+ package_id id;
+ double rank;
};
- // Count number of internal packages distinct names.
- //
- #pragma db view object(package) \
- query(package::internal_repository.is_not_null () && (?))
- struct internal_package_name_count
+ #pragma db view \
+ query("/*CALL*/ SELECT count(*) FROM search_latest_packages(?)")
+ struct latest_package_count
{
- #pragma db column("count(DISTINCT" + package::id.name + ")")
std::size_t result;
operator std::size_t () const {return result;}
};
- #pragma db view object(package)
+ #pragma db view query("/*CALL*/ SELECT * FROM search_packages(?)")
+ struct package_search_rank
+ {
+ package_id id;
+ double rank;
+ };
+
+ #pragma db view query("/*CALL*/ SELECT count(*) FROM search_packages(?)")
struct package_count
{
- #pragma db column("count(*)")
std::size_t result;
operator std::size_t () const {return result;}
};
+ #pragma db view query("/*CALL*/ SELECT * FROM latest_package(?)")
+ struct latest_package
+ {
+ package_id id;
+ };
+
// Version comparison operators.
//
// They allow comparing objects that have epoch, canonical_upstream
diff --git a/brep/package-extra.sql b/brep/package-extra.sql
new file mode 100644
index 0000000..d30a48e
--- /dev/null
+++ b/brep/package-extra.sql
@@ -0,0 +1,103 @@
+DROP TYPE IF EXISTS weighted_text CASCADE;
+CREATE TYPE weighted_text AS (a TEXT, b TEXT, c TEXT, d TEXT);
+
+DROP FUNCTION IF EXISTS to_tsvector(IN document weighted_text);
+DROP FUNCTION IF EXISTS search_packages(IN query tsquery, INOUT name TEXT);
+DROP FUNCTION IF EXISTS search_latest_packages(IN query tsquery);
+DROP FUNCTION IF EXISTS latest_package(INOUT name TEXT);
+DROP FUNCTION IF EXISTS latest_packages();
+
+-- Return the latest versions of internal packages as a set of package rows.
+--
+CREATE FUNCTION
+latest_packages()
+RETURNS SETOF package AS $$
+ SELECT p1.*
+ FROM package p1 LEFT JOIN package p2 ON (
+ p1.name = p2.name AND
+ (p1.version_epoch < p2.version_epoch OR
+ p1.version_epoch = p2.version_epoch AND
+ (p1.version_canonical_upstream < p2.version_canonical_upstream OR
+ p1.version_canonical_upstream = p2.version_canonical_upstream AND
+ p1.version_revision < p2.version_revision)))
+ WHERE
+ p1.internal_repository IS NOT NULL AND p2.name IS NULL;
+$$ LANGUAGE SQL STABLE;
+
+-- Find the latest version of an internal package having the specified name.
+-- Return a single row containing the package id, empty row set if the package
+-- not found.
+--
+CREATE FUNCTION
+latest_package(INOUT name TEXT,
+ OUT version_epoch SMALLINT,
+ OUT version_canonical_upstream TEXT,
+ OUT version_revision SMALLINT)
+RETURNS SETOF record AS $$
+ SELECT name, version_epoch, version_canonical_upstream, version_revision
+ FROM latest_packages()
+ WHERE name = latest_package.name;
+$$ LANGUAGE SQL STABLE;
+
+-- Search for the latest version of an internal packages matching the specified
+-- search query. Return a set of rows containing the package id and search
+-- rank. If query is NULL, then match all packages and return 0 rank for
+-- all rows.
+--
+CREATE FUNCTION
+search_latest_packages(IN query tsquery,
+ OUT name TEXT,
+ OUT version_epoch SMALLINT,
+ OUT version_canonical_upstream TEXT,
+ OUT version_revision SMALLINT,
+ OUT rank real)
+RETURNS SETOF record AS $$
+ SELECT name, version_epoch, version_canonical_upstream, version_revision,
+ CASE
+ WHEN query IS NULL THEN 0
+-- Weight mapping: D C B A
+ ELSE ts_rank_cd('{0.05, 0.2, 0.9, 1.0}', search_index, query)
+ END AS rank
+ FROM latest_packages()
+ WHERE query IS NULL OR search_index @@ query;
+$$ LANGUAGE SQL STABLE;
+
+-- Search for packages matching the search query and having the specified name.
+-- Return a set of rows containing the package id and search rank. If query
+-- is NULL, then match all packages and return 0 rank for all rows.
+--
+CREATE FUNCTION
+search_packages(IN query tsquery,
+ INOUT name TEXT,
+ OUT version_epoch SMALLINT,
+ OUT version_canonical_upstream TEXT,
+ OUT version_revision SMALLINT,
+ OUT rank real)
+RETURNS SETOF record AS $$
+ SELECT name, version_epoch, version_canonical_upstream, version_revision,
+ CASE
+ WHEN query IS NULL THEN 0
+-- Weight mapping: D C B A
+ ELSE ts_rank_cd('{0.05, 0.2, 0.9, 1.0}', search_index, query)
+ END AS rank
+ FROM package
+ WHERE
+ internal_repository IS NOT NULL AND name = search_packages.name AND
+ (query IS NULL OR search_index @@ query);
+$$ LANGUAGE SQL STABLE;
+
+-- Parse weighted_text to tsvector.
+--
+CREATE FUNCTION
+to_tsvector(IN document weighted_text)
+RETURNS tsvector AS $$
+ SELECT
+ CASE
+ WHEN document IS NULL THEN NULL
+ ELSE
+ setweight(to_tsvector(document.a), 'A') ||
+ setweight(to_tsvector(document.b), 'B') ||
+ setweight(to_tsvector(document.c), 'C') ||
+ setweight(to_tsvector(document.d), 'D')
+ END
+$$ LANGUAGE SQL IMMUTABLE;
diff --git a/brep/package-search.cxx b/brep/package-search.cxx
index ebc5055..ffe5cb9 100644
--- a/brep/package-search.cxx
+++ b/brep/package-search.cxx
@@ -40,6 +40,18 @@ namespace brep
db_ = shared_database (options_->db_host (), options_->db_port ());
}
+ template <typename T>
+ static inline query<T>
+ search_param (const string& q)
+ {
+ using query = query<T>;
+ return "(" +
+ (q.empty ()
+ ? query ("NULL")
+ : "plainto_tsquery (" + query::_val (q) + ")") +
+ ")";
+ }
+
void package_search::
handle (request& rq, response& rs)
{
@@ -77,62 +89,62 @@ namespace brep
<< "#packages {font-size: x-large;}" << ident
<< ".package {margin: 0.5em 0 0;}" << ident
<< ".name {font-size: x-large;}" << ident
- << ".tags {margin: 0.3em 0 0;}"
+ << ".tags {margin: 0.3em 0 0;}" << ident
+ << "form {margin: 0.5em 0 0 0;}"
<< ~CSS_STYLE
<< ~HEAD
<< BODY;
- string q (pr.query ().empty () ? "" : "q=" + mime_url_encode (pr.query ()));
+ const string& sq (pr.query ()); // Search query.
+ string qp (sq.empty () ? "" : "q=" + mime_url_encode (sq));
size_t rop (options_->results_on_page ());
transaction t (db_->begin ());
- // @@ Query will include search criteria if specified.
- //
- size_t pc (db_->query_value<internal_package_name_count> ());
-
- s << DIV(ID="packages") << "Packages (" << pc << ")" << ~DIV;
+ size_t pc (
+ db_->query_value<latest_package_count> (
+ search_param<latest_package_count> (sq)));
- // @@ Query will also include search criteria if specified.
- //
- using query = query<latest_internal_package>;
+ s << DIV(ID="packages") << "Packages (" << pc << ")" << ~DIV
+ << FORM_SEARCH (sq);
auto r (
- db_->query<latest_internal_package> (query (true) +
- "ORDER BY" + query::package::id.name +
+ db_->query<latest_package_search_rank> (
+ search_param<latest_package_search_rank> (sq) +
+ "ORDER BY rank DESC, name" +
"OFFSET" + to_string (pr.page () * rop) +
"LIMIT" + to_string (rop)));
- for (const auto& ip: r)
+ for (const auto& pr: r)
{
- const package& p (ip);
+ shared_ptr<package> p (db_->load<package> (pr.id));
s << DIV(CLASS="package")
<< DIV(CLASS="name")
<< A
- << HREF << "/go/" << mime_url_encode (p.id.name);
+ << HREF << "/go/" << mime_url_encode (p->id.name);
// Propagate search criteria to the package version search url.
//
- if (!q.empty ())
- s << "?" << q;
+ if (!qp.empty ())
+ s << "?" << qp;
s << ~HREF
- << p.id.name
+ << p->id.name
<< ~A
<< ~DIV
- << DIV(CLASS="summary") << p.summary << ~DIV
- << DIV_TAGS (p.tags)
- << DIV_LICENSES (p.license_alternatives)
+ << DIV(CLASS="summary") << p->summary << ~DIV
+ << DIV_TAGS (p->tags)
+ << DIV_LICENSES (p->license_alternatives)
<< DIV(CLASS="dependencies")
- << "Dependencies: " << p.dependencies.size ()
+ << "Dependencies: " << p->dependencies.size ()
<< ~DIV
<< ~DIV;
}
t.commit ();
- string u (q.empty () ? "/" : ("/?" + q));
+ string u (qp.empty () ? "/" : ("/?" + qp));
s << DIV_PAGER (pr.page (), pc, rop, options_->pages_in_pager (), u)
<< ~BODY
diff --git a/brep/package-traits b/brep/package-traits
new file mode 100644
index 0000000..21ec0b7
--- /dev/null
+++ b/brep/package-traits
@@ -0,0 +1,38 @@
+// file : brep/package-traits -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BREP_PACKAGE_TRAITS
+#define BREP_PACKAGE_TRAITS
+
+#include <cstddef> // size_t
+
+#include <odb/pgsql/traits.hxx>
+
+#include <brep/package> // weighted_text
+
+namespace odb
+{
+ namespace pgsql
+ {
+ template <>
+ class value_traits<brep::weighted_text, id_string>
+ {
+ public:
+ typedef brep::weighted_text value_type;
+ typedef value_type query_type;
+ typedef details::buffer image_type;
+
+ static void
+ set_value (value_type&, const details::buffer&, std::size_t, bool) {}
+
+ static void
+ set_image (details::buffer&,
+ std::size_t& n,
+ bool& is_null,
+ const value_type&);
+ };
+ }
+}
+
+#endif // BREP_PACKAGE_TRAITS
diff --git a/brep/package-traits.cxx b/brep/package-traits.cxx
new file mode 100644
index 0000000..cd30d81
--- /dev/null
+++ b/brep/package-traits.cxx
@@ -0,0 +1,70 @@
+// file : brep/package-traits.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <brep/package-traits>
+
+#include <string>
+#include <ostream>
+#include <sstream>
+#include <cstring> // memcpy
+
+#include <odb/pgsql/traits.hxx>
+
+using namespace std;
+
+namespace odb
+{
+ namespace pgsql
+ {
+ static inline void
+ to_pg_string (ostream& os, const string& s)
+ {
+ os << '"';
+
+ for (auto c: s)
+ {
+ if (c == '\\' || c == '"')
+ os << '\\';
+
+ os << c;
+ }
+
+ os << '"';
+ }
+
+ // Convert C++ weighted_text struct to PostgreSQL weighted_text
+ // composite type.
+ //
+ void value_traits<brep::weighted_text, id_string>::
+ set_image (details::buffer& b,
+ size_t& n,
+ bool& is_null,
+ const value_type& v)
+ {
+ is_null = v.a.empty () && v.b.empty () && v.c.empty () && v.d.empty ();
+
+ if (!is_null)
+ {
+ ostringstream o;
+ o << "(";
+ to_pg_string (o, v.a);
+ o << ",";
+ to_pg_string (o, v.b);
+ o << ",";
+ to_pg_string (o, v.c);
+ o << ",";
+ to_pg_string (o, v.d);
+ o << ")";
+
+ const string& s (o.str ());
+ n = s.size ();
+
+ if (n > b.capacity ())
+ b.capacity (n);
+
+ memcpy (b.data (), s.c_str (), n);
+ }
+ }
+ }
+}
diff --git a/brep/package-version-search.cxx b/brep/package-version-search.cxx
index 933aaa8..84c06e1 100644
--- a/brep/package-version-search.cxx
+++ b/brep/package-version-search.cxx
@@ -40,6 +40,21 @@ namespace brep
db_ = shared_database (options_->db_host (), options_->db_port ());
}
+ template <typename T>
+ static inline query<T>
+ search_params (const string& n, const string& q)
+ {
+ using query = query<T>;
+
+ return "(" +
+ (q.empty ()
+ ? query ("NULL")
+ : "plainto_tsquery (" + query::_val (q) + ")") +
+ "," +
+ query::_val (n) +
+ ")";
+ }
+
void package_version_search::
handle (request& rq, response& rs)
{
@@ -82,28 +97,29 @@ namespace brep
<< "#versions {font-size: x-large; margin: 0.5em 0 0;}" << ident
<< ".package_version {margin: 0.5em 0 0;}" << ident
<< ".version {font-size: x-large;}" << ident
- << ".priority {margin: 0.3em 0 0;}"
+ << ".priority {margin: 0.3em 0 0;}" << ident
+ << "form {margin: 0.5em 0 0 0;}"
<< ~CSS_STYLE
<< ~HEAD
<< BODY
<< DIV(ID="name") << name << ~DIV;
+ const string& sq (pr.query ()); // Search query.
size_t rop (options_->results_on_page ());
transaction t (db_->begin ());
shared_ptr<package> p;
{
- using query = query<latest_internal_package>;
-
- latest_internal_package ip;
- if (!db_->query_one<latest_internal_package> (
- query::package::id.name == name, ip))
+ latest_package lp;
+ if (!db_->query_one<latest_package> (
+ query<latest_package>(
+ "(" + query<latest_package>::_val (name) + ")"), lp))
{
throw invalid_request (404, "Package '" + name + "' not found");
}
- p = ip;
+ p = db_->load<package> (lp.id);
}
s << DIV(ID="summary") << p->summary << ~DIV
@@ -115,56 +131,37 @@ namespace brep
s << DIV_TAGS (p->tags);
- size_t pvc;
- {
- using query = query<package_count>;
-
- // @@ Query will also include search criteria if specified.
- //
- pvc = db_->query_value<package_count> (
- query::id.name == name && query::internal_repository.is_not_null ());
- }
-
- s << DIV(ID="versions") << "Versions (" << pvc << ")" << ~DIV;
-
- // @@ Need to find some better place for package url and email or drop them
- // from this page totally.
- //
-/*
- if (p->package_url)
- s << DIV_URL (*p->package_url);
+ size_t pvc (
+ db_->query_value<package_count> (
+ search_params<package_count> (name, sq)));
- if (p->package_email)
- s << DIV_EMAIL (*p->package_email);
-*/
+ s << DIV(ID="versions") << "Versions (" << pvc << ")" << ~DIV
+ << FORM_SEARCH (sq);
- // @@ Use appropriate view when clarify which package info to be displayed
- // and search index structure get implemented. Query will also include
- // search criteria if specified.
- //
- using query = query<package>;
auto r (
- db_->query<package> (
- (query::id.name == name && query::internal_repository.is_not_null ()) +
- order_by_version_desc (query::id.version) +
+ db_->query<package_search_rank> (
+ search_params<package_search_rank> (name, sq) +
+ "ORDER BY rank DESC, version_epoch DESC, "
+ "version_canonical_upstream DESC, version_revision DESC" +
"OFFSET" + to_string (pr.page () * rop) +
"LIMIT" + to_string (rop)));
- for (const auto& v: r)
+ for (const auto& pr: r)
{
- const string& vs (v.version.string ());
+ shared_ptr<package> p (db_->load<package> (pr.id));
+ const string& v (p->version.string ());
s << DIV(CLASS="package_version")
<< DIV(CLASS="version")
<< A
- << HREF << "/go/" << mime_url_encode (name) << "/" << vs << ~HREF
- << vs
+ << HREF << "/go/" << mime_url_encode (name) << "/" << v << ~HREF
+ << v
<< ~A
<< ~DIV
- << DIV_PRIORITY (v.priority)
- << DIV_LICENSES (v.license_alternatives)
+ << DIV_PRIORITY (p->priority)
+ << DIV_LICENSES (p->license_alternatives)
<< DIV(CLASS="dependencies")
- << "Dependencies: " << v.dependencies.size ()
+ << "Dependencies: " << p->dependencies.size ()
<< ~DIV
<< ~DIV;
}
@@ -172,8 +169,8 @@ namespace brep
t.commit ();
string u (mime_url_encode (name));
- if (!pr.query ().empty ())
- u += "?q=" + mime_url_encode (pr.query ());
+ if (!sq.empty ())
+ u += "?q=" + mime_url_encode (sq);
s << DIV_PAGER (pr.page (), pvc, rop, options_->pages_in_pager (), u)
<< ~BODY
diff --git a/brep/package.cxx b/brep/package.cxx
index fd40bc6..9b7d24a 100644
--- a/brep/package.cxx
+++ b/brep/package.cxx
@@ -66,6 +66,48 @@ namespace brep
external_repositories.emplace_back (move (rp));
}
+ weighted_text package::
+ search_text () const
+ {
+ if (internal_repository == nullptr)
+ return weighted_text ();
+
+ // Derive keywords from the basic package information: name,
+ // version.
+ //
+ //@@ What about 'stable' from cppget.org/stable? Add path of
+ // the repository to keywords? Or is it too "polluting" and
+ // we will handle it in some other way (e.g., by allowing
+ // the user to specify repo location in the drop-down box)?
+ // Probably drop-box would be better as also tells what are
+ // the available internal repositories.
+ //
+ string k (id.name + " " + version.string () + " " + version.string (true));
+
+ // Add tags to keywords.
+ //
+ for (const auto& t: tags)
+ k += " " + t;
+
+ // Add licenses to keywords.
+ //
+ for (const auto& la: license_alternatives)
+ {
+ for (const auto& l: la)
+ {
+ k += " " + l;
+
+ // If license is say LGPLv2 then LGPL is also a keyword.
+ //
+ size_t n (l.size ());
+ if (n > 2 && l[n - 2] == 'v' && l[n - 1] >= '0' && l[n - 1] <= '9')
+ k += " " + string (l, 0, n - 2);
+ }
+ }
+
+ return {move (k), summary, description ? *description : "", changes};
+ }
+
// repository
//
repository::
diff --git a/brep/page b/brep/page
index d14bb41..b923c7f 100644
--- a/brep/page
+++ b/brep/page
@@ -124,6 +124,20 @@ namespace brep
private:
const priority& priority_;
};
+
+ // Generates package search element.
+ //
+ class FORM_SEARCH
+ {
+ public:
+ FORM_SEARCH (const std::string& q): query_ (q) {}
+
+ void
+ operator() (xml::serializer& s) const;
+
+ private:
+ const std::string& query_;
+ };
}
#endif // BREP_PAGE
diff --git a/brep/page.cxx b/brep/page.cxx
index b59cd4b..c68a7eb 100644
--- a/brep/page.cxx
+++ b/brep/page.cxx
@@ -206,4 +206,15 @@ namespace brep
<< "Priority: " << priority_names[priority_]
<< ~DIV;
}
+
+ // FORM_SEARCH
+ //
+ void FORM_SEARCH::
+ operator() (serializer& s) const
+ {
+ s << FORM
+ << *INPUT(TYPE="search", NAME="q", VALUE=query_)
+ << *INPUT(TYPE="submit", VALUE="Search")
+ << ~FORM;
+ }
}