From 21033565488f6c63b4c40962cccfdc8b6ca32b2a Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Sat, 7 Jul 2018 19:09:53 +0300 Subject: Add support for package submission --- INSTALL | 40 +- etc/brep-module.conf | 60 ++- libbrep/utility.hxx | 3 +- manifest | 1 + mod/database-module.cxx | 6 +- mod/database-module.hxx | 8 +- mod/mod-build-force.cxx | 6 +- mod/mod-build-log.cxx | 6 +- mod/mod-build-result.cxx | 13 +- mod/mod-build-task.cxx | 13 +- mod/mod-builds.cxx | 6 +- mod/mod-package-details.cxx | 6 +- mod/mod-package-search.cxx | 11 +- mod/mod-package-version-details.cxx | 6 +- mod/mod-repository-details.cxx | 6 +- mod/mod-repository-root.cxx | 123 +++++-- mod/mod-repository-root.hxx | 8 +- mod/mod-submit.cxx | 715 ++++++++++++++++++++++++++++++++++++ mod/mod-submit.hxx | 45 +++ mod/module.cxx | 68 ++-- mod/module.hxx | 32 +- mod/options.cli | 34 +- mod/page.cxx | 9 +- repositories.manifest | 4 + web/apache/request.cxx | 592 +++++++++++++++++++++++------ web/apache/request.hxx | 76 +++- web/apache/service.cxx | 4 +- web/apache/service.hxx | 68 ++-- web/apache/service.txx | 54 +-- web/buildfile | 1 + web/module.hxx | 79 ++-- www/buildfile | 6 +- www/submit-body.css | 22 ++ www/submit.css | 3 + www/submit.scss | 3 + www/submit.xhtml | 25 ++ 36 files changed, 1796 insertions(+), 366 deletions(-) create mode 100644 mod/mod-submit.cxx create mode 100644 mod/mod-submit.hxx create mode 100644 www/submit-body.css create mode 100644 www/submit.css create mode 100644 www/submit.scss create mode 100644 www/submit.xhtml diff --git a/INSTALL b/INSTALL index 4904dc7..4bfb9cd 100644 --- a/INSTALL +++ b/INSTALL @@ -4,8 +4,8 @@ you are using a systemd-based distribution. If not, then you will need to replace systemctl commands with the equivalent init.d ones. The below instructions include steps for setting up brep as the build2 build -bot controller. This functionality is optional and if not needed, then the -corresponding steps can be omitted. +bot controller and package submission service. Both of these functionalities +are optional and, if not needed, then the corresponding steps can be omitted. 1. Create 'brep' User @@ -60,12 +60,12 @@ b) Install PostgreSQL 9.3 or above (including the contrib package containing group, not user. However, most installations use the same name for both.] c) Install PostgreSQL and Apache2 development files. Specifically, we need - PostgreSQL's libpq and Apache's libapr and web server development files. - Below are the names of their packages for some distributions: + PostgreSQL's libpq and Apache's libapr, libapreq, and web server development + files. Below are the names of their packages for some distributions: - Debian/Ubuntu: libpq-dev libapr1-dev apache2-dev - Fedora/RHEL: posqtgresql-devel apr-devel httpd-devel - FreeBSD: postgresqlXY-client apr apache24 + Debian/Ubuntu: libpq-dev libapr1-dev libapreq2-dev apache2-dev + Fedora/RHEL: posqtgresql-devel apr-devel libapreq2-devel httpd-devel + FreeBSD: postgresqlXY-client apr libapreq2 apache24 d) Unless you already have the build2 toolchain installed, download (normally from https://download.build2.org) and install build2-toolchain by following @@ -99,7 +99,7 @@ bpkg create \ bpkg add https://pkg.cppget.org/1/alpha bpkg fetch -bpkg build brep ?sys:libapr1 ?sys:libpq +bpkg build brep ?sys:libapr1 ?sys:libapreq2 ?sys:libpq bpkg install brep $ cd .. # Back to brep home. @@ -217,8 +217,28 @@ $ psql -d brep_build -c 'SELECT DISTINCT name FROM build_package' $ cp install/share/brep/etc/brep-module.conf config/ $ edit config/brep-module.conf # Adjust default values if required. -Note that to enable the build2 build bot controller functionality you need to -set the build-config option in brep-module.conf. +To enable the build2 build bot controller functionality you will need to set +the build-config option in brep-module.conf. + +To enable the package submission functionality you will need to specify the +submit-data and submit-temp directories in brep-module.conf. Note that these +directories must exist and have read, write, and execute permissions granted +to the www-data user. This, for example, can be achieved with the following +commands: + +$ mkdir /home/brep/submit-data +$ mkdir /home/brep/submit-temp +$ setfacl -m g:www-data:rwx /home/brep/submit-data +$ setfacl -m g:www-data:rwx /home/brep/submit-temp + +To also enable the package submission web form set the submit-form option. You +can use the installed sample submission form fragment or create a custom one +if your submission handler requires additional information (besides the +package archive and its SHA256 checksum) to be supplied by the client. For +example: + +$ cp install/share/brep/www/submit.xhtml config/ +$ edit config/submit.xhtml # Add custom form fields, adjust CSS style, etc. Here we assume you have setup an appropriate Apache2 virtual server. Open the corresponding Apache2 .conf file and add the following inside VirtualHost (you diff --git a/etc/brep-module.conf b/etc/brep-module.conf index 3d53cc7..0612969 100644 --- a/etc/brep-module.conf +++ b/etc/brep-module.conf @@ -22,6 +22,7 @@ # menu Packages= # menu Builds=?builds +# menu Submit=?submit menu About=?about @@ -133,7 +134,7 @@ menu About=?about # The maximum size of the build result manifest accepted. Note that the HTTP # POST request body is cached to retry database transactions in the face of -# recoverable failures (deadlock, loss of connection, etc). Default is 10M +# recoverable failures (deadlock, loss of connection, etc). Default is 10M. # # build-result-request-max-size 10485760 @@ -200,6 +201,63 @@ menu About=?about # +# The directory to save final submission data to. If unspecified, the package +# submission functionality will be disabled. If specified, then submit-temp +# must be specified as well. +# +# Note that the directory path must be absolute and the directory itself must +# exist and have read, write, and execute permissions granted to the user that +# runs the web server. +# +# submit-data + + +# The directory to save temporary submission data to. Must be specified if the +# package submission functionality is enabled. +# +# Note that this directory must be on the same filesystem and satisfy the same +# requirements as submit-data. It is also the user's responsibility to clean +# it up after an unclean web server shutdown. +# +# submit-temp + + +# The maximum size of the submission data accepted. Note that currently the +# entire submission request is read into memory. Default is 10M. +# +# submit-max-size 10485760 + + +# The package submission form fragment. If specified, then its contents are +# treated as an XHTML5 fragment that is inserted into the element of +# the submission page. If unspecified, then no submission page will be +# displayed. Note that the file path must be absolute. +# +# submit-form + + +# The package submission email. If specified, the submission request and +# result manifests will be sent to this address. +# +# submit-email + + +# The handler program to be executed on package submission. The handler is +# executed as part of the submission request and is passed additional +# arguments that can be specified with submit-handler-argument followed by +# the absolute path to the submission directory. Note that the program path +# must be absolute. +# +# submit-handler + + +# Additional arguments to be passed to the submission handler program (see +# submit-handler for details). Repeat this option to specify multiple +# arguments. +# +# submit-handler-argument + + # Trace verbosity. Disabled by default. # # verbosity 0 diff --git a/libbrep/utility.hxx b/libbrep/utility.hxx index 0fd2fe1..e160e29 100644 --- a/libbrep/utility.hxx +++ b/libbrep/utility.hxx @@ -11,7 +11,7 @@ #include // assert() #include // make_move_iterator() -#include // reverse_iterate(), +#include // casecmp(), reverse_iterate(), // operator<<(ostream, exception) namespace brep @@ -27,6 +27,7 @@ namespace brep // // + using butl::casecmp; using butl::reverse_iterate; } diff --git a/manifest b/manifest index 65303d6..f7fc460 100644 --- a/manifest +++ b/manifest @@ -22,6 +22,7 @@ depends: * bpkg >= 0.8.0- # @@ Should probably become conditional dependency. requires: ? cli ; Only required if changing .cli files. depends: libapr1 +depends: libapreq2 depends: libstudxml [1.1.0-b.3.1 1.1.0-b.4) depends: libodb [2.5.0-b.8.1 2.5.0-b.9) depends: libodb-pgsql [2.5.0-b.8.1 2.5.0-b.9) diff --git a/mod/database-module.cxx b/mod/database-module.cxx index 22a4d1e..137d7ef 100644 --- a/mod/database-module.cxx +++ b/mod/database-module.cxx @@ -28,7 +28,7 @@ namespace brep // database_module:: database_module (const database_module& r) - : module (r), + : handler (r), retry_ (r.retry_), package_db_ (r.initialized_ ? r.package_db_ : nullptr), build_db_ (r.initialized_ ? r.build_db_ : nullptr), @@ -104,13 +104,13 @@ namespace brep handle (request& rq, response& rs, log& l) try { - return module::handle (rq, rs, l); + return handler::handle (rq, rs, l); } catch (const odb::recoverable& e) { if (retry_-- > 0) { - MODULE_DIAG; + HANDLER_DIAG; l1 ([&]{trace << e << "; " << retry_ + 1 << " retries left";}); throw retry (); } diff --git a/mod/database-module.hxx b/mod/database-module.hxx index 9d1ef4c..70ae004 100644 --- a/mod/database-module.hxx +++ b/mod/database-module.hxx @@ -22,11 +22,11 @@ namespace brep { - // A module that utilises the database. Specifically, it will retry the + // A handler that utilises the database. Specifically, it will retry the // request in the face of recoverable database failures (deadlock, loss of // connection, etc) up to a certain number of times. // - class database_module: public module + class database_module: public handler { protected: database_module () = default; @@ -38,10 +38,10 @@ namespace brep database_module (const database_module&); // Required to avoid getting warning from clang that - // database_module::init() hides module::init() virtual functions. This + // database_module::init() hides handler::init() virtual functions. This // way all functions get to the same scope and become overloaded set. // - using module::init; + using handler::init; // Initialize the package database instance. Throw odb::exception on // failure. diff --git a/mod/mod-build-force.cxx b/mod/mod-build-force.cxx index af47b4c..b6514ce 100644 --- a/mod/mod-build-force.cxx +++ b/mod/mod-build-force.cxx @@ -35,7 +35,7 @@ build_force (const build_force& r) void brep::build_force:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -51,7 +51,7 @@ handle (request& rq, response& rs) { using brep::version; // Not to confuse with module::version. - MODULE_DIAG; + HANDLER_DIAG; if (build_db_ == nullptr) throw invalid_request (501, "not implemented"); @@ -60,7 +60,7 @@ handle (request& rq, response& rs) try { - name_value_scanner s (rq.parameters ()); + name_value_scanner s (rq.parameters (8 * 1024)); params = params::build_force (s, unknown_mode::fail, unknown_mode::fail); } catch (const cli::exception& e) diff --git a/mod/mod-build-log.cxx b/mod/mod-build-log.cxx index c1eec4c..ab9ab12 100644 --- a/mod/mod-build-log.cxx +++ b/mod/mod-build-log.cxx @@ -37,7 +37,7 @@ build_log (const build_log& r) void brep::build_log:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -56,7 +56,7 @@ handle (request& rq, response& rs) { using brep::version; // Not to confuse with module::version. - MODULE_DIAG; + HANDLER_DIAG; if (build_db_ == nullptr) throw invalid_request (501, "not implemented"); @@ -146,7 +146,7 @@ handle (request& rq, response& rs) // try { - name_value_scanner s (rq.parameters ()); + name_value_scanner s (rq.parameters (1024)); params::build_log (s, unknown_mode::fail, unknown_mode::fail); } catch (const cli::exception& e) diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx index 7891fe1..65e8425 100644 --- a/mod/mod-build-result.cxx +++ b/mod/mod-build-result.cxx @@ -46,7 +46,7 @@ build_result (const build_result& r) void brep::build_result:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -68,7 +68,7 @@ handle (request& rq, response&) { using brep::version; // Not to confuse with module::version. - MODULE_DIAG; + HANDLER_DIAG; if (build_db_ == nullptr) throw invalid_request (501, "not implemented"); @@ -77,7 +77,10 @@ handle (request& rq, response&) // try { - name_value_scanner s (rq.parameters ()); + // Note that we expect the result request manifest to be posted and so + // consider parameters from the URL only. + // + name_value_scanner s (rq.parameters (0 /* limit */, true /* url_only */)); params::build_result (s, unknown_mode::fail, unknown_mode::fail); } catch (const cli::exception& e) @@ -89,6 +92,10 @@ handle (request& rq, response&) try { + // We fully cache the request content to be able to retry the request + // handling if odb::recoverable is thrown (see database-module.cxx for + // details). + // size_t limit (options_->build_result_request_max_size ()); manifest_parser p (rq.content (limit, limit), "result_request_manifest"); rqm = result_request_manifest (p); diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index f1e4cdb..4e56d02 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -52,7 +52,7 @@ build_task (const build_task& r) void brep::build_task:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -81,7 +81,7 @@ init (scanner& s) bool brep::build_task:: handle (request& rq, response& rs) { - MODULE_DIAG; + HANDLER_DIAG; if (build_db_ == nullptr) throw invalid_request (501, "not implemented"); @@ -90,7 +90,10 @@ handle (request& rq, response& rs) try { - name_value_scanner s (rq.parameters ()); + // Note that we expect the task request manifest to be posted and so + // consider parameters from the URL only. + // + name_value_scanner s (rq.parameters (0 /* limit */, true /* url_only */)); params = params::build_task (s, unknown_mode::fail, unknown_mode::fail); } catch (const cli::exception& e) @@ -102,6 +105,10 @@ handle (request& rq, response& rs) try { + // We fully cache the request content to be able to retry the request + // handling if odb::recoverable is thrown (see database-module.cxx for + // details). + // size_t limit (options_->build_task_request_max_size ()); manifest_parser p (rq.content (limit, limit), "task_request_manifest"); tqm = task_request_manifest (p); diff --git a/mod/mod-builds.cxx b/mod/mod-builds.cxx index e43739f..f255b25 100644 --- a/mod/mod-builds.cxx +++ b/mod/mod-builds.cxx @@ -50,7 +50,7 @@ builds (const builds& r) void brep::builds:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -264,7 +264,7 @@ handle (request& rq, response& rs) using brep::version; using namespace web::xhtml; - MODULE_DIAG; + HANDLER_DIAG; if (build_db_ == nullptr) throw invalid_request (501, "not implemented"); @@ -277,7 +277,7 @@ handle (request& rq, response& rs) try { - name_value_scanner s (rq.parameters ()); + name_value_scanner s (rq.parameters (8 * 1024)); params = params::builds (s, unknown_mode::fail, unknown_mode::fail); } catch (const cli::exception& e) diff --git a/mod/mod-package-details.cxx b/mod/mod-package-details.cxx index a348d95..ffd0ae7 100644 --- a/mod/mod-package-details.cxx +++ b/mod/mod-package-details.cxx @@ -37,7 +37,7 @@ package_details (const package_details& r) void brep::package_details:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -69,7 +69,7 @@ handle (request& rq, response& rs) using namespace web; using namespace web::xhtml; - MODULE_DIAG; + HANDLER_DIAG; const size_t res_page (options_->search_results ()); const dir_path& root (options_->root ()); @@ -79,7 +79,7 @@ handle (request& rq, response& rs) try { - name_value_scanner s (rq.parameters ()); + name_value_scanner s (rq.parameters (8 * 1024)); params = params::package_details ( s, unknown_mode::fail, unknown_mode::fail); diff --git a/mod/mod-package-search.cxx b/mod/mod-package-search.cxx index d7a2b98..d53397e 100644 --- a/mod/mod-package-search.cxx +++ b/mod/mod-package-search.cxx @@ -38,7 +38,7 @@ package_search (const package_search& r) void brep::package_search:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -82,7 +82,7 @@ handle (request& rq, response& rs) { using namespace web::xhtml; - MODULE_DIAG; + HANDLER_DIAG; const size_t res_page (options_->search_results ()); const dir_path& root (options_->root ()); @@ -92,7 +92,7 @@ handle (request& rq, response& rs) try { - name_value_scanner s (rq.parameters ()); + name_value_scanner s (rq.parameters (8 * 1024)); params = params::package_search ( s, unknown_mode::fail, unknown_mode::fail); } @@ -125,7 +125,10 @@ handle (request& rq, response& rs) // element of the search form. The problem appears in Firefox and has a // (4-year old, at the time of this writing) bug report: // - // https://bugzilla.mozilla.org/show_bug.cgi?id=712130. + // https://bugzilla.mozilla.org/show_bug.cgi?id=712130 + // + // @@ An update: claimed to be fixed in Firefox 60 that is released in + // May 2018. Is it time to cleanup? Remember to cleanup in all places. // << SCRIPT << " " << ~SCRIPT << ~HEAD diff --git a/mod/mod-package-version-details.cxx b/mod/mod-package-version-details.cxx index cef9357..833b802 100644 --- a/mod/mod-package-version-details.cxx +++ b/mod/mod-package-version-details.cxx @@ -43,7 +43,7 @@ package_version_details (const package_version_details& r) void brep::package_version_details:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -66,7 +66,7 @@ handle (request& rq, response& rs) using namespace web::xhtml; using brep::version; // Not to confuse with module::version. - MODULE_DIAG; + HANDLER_DIAG; const string& host (options_->host ()); const dir_path& root (options_->root ()); @@ -101,7 +101,7 @@ handle (request& rq, response& rs) try { - name_value_scanner s (rq.parameters ()); + name_value_scanner s (rq.parameters (1024)); params = params::package_version_details ( s, unknown_mode::fail, unknown_mode::fail); diff --git a/mod/mod-repository-details.cxx b/mod/mod-repository-details.cxx index 6043328..36d5508 100644 --- a/mod/mod-repository-details.cxx +++ b/mod/mod-repository-details.cxx @@ -41,7 +41,7 @@ repository_details (const repository_details& r) void brep::repository_details:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -57,7 +57,7 @@ handle (request& rq, response& rs) { using namespace web::xhtml; - MODULE_DIAG; + HANDLER_DIAG; const dir_path& root (options_->root ()); @@ -65,7 +65,7 @@ handle (request& rq, response& rs) // try { - name_value_scanner s (rq.parameters ()); + name_value_scanner s (rq.parameters (1024)); params::repository_details (s, unknown_mode::fail, unknown_mode::fail); } catch (const cli::exception& e) diff --git a/mod/mod-repository-root.cxx b/mod/mod-repository-root.cxx index aaf6988..367b137 100644 --- a/mod/mod-repository-root.cxx +++ b/mod/mod-repository-root.cxx @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -27,30 +28,66 @@ using namespace brep::cli; namespace brep { - // request_proxy + // Request proxy. Removes the first parameter that is assumed to be a + // function name. // class request_proxy: public request { public: - request_proxy (request& r, const name_values& p) - : request_ (r), parameters_ (p) {} + request_proxy (request& r): request_ (r) {} virtual const path_type& path () {return request_.path ();} virtual const name_values& - parameters () {return parameters_;} + parameters (size_t limit, bool url_only) + { + if (!parameters_ || url_only < url_only_parameters_) + { + parameters_ = request_.parameters (limit, url_only); + + assert (!parameters_->empty ()); // Always starts with a function name. + parameters_->erase (parameters_->begin ()); + + url_only_parameters_ = url_only; + } + + return *parameters_; + } + + istream& + open_upload (size_t index) + { + // The original request object still contains the function name entry, + // so we shift the index. + // + return request_.open_upload (index + 1); + } + + istream& + open_upload (const string& name) + { + // We don't expect the function name here as a parameter name. + // + return request_.open_upload (name); + } + + virtual const name_values& + headers () {return request_.headers ();} virtual const name_values& cookies () {return request_.cookies ();} virtual istream& - content (size_t limit, size_t buffer) { - return request_.content (limit, buffer);} + content (size_t limit, size_t buffer) + { + return request_.content (limit, buffer); + } private: request& request_; - const name_values& parameters_; + optional parameters_; + bool url_only_parameters_; // Meaningless if parameters_ is not present. }; // repository_root @@ -65,15 +102,16 @@ namespace brep build_result_ (make_shared ()), build_force_ (make_shared ()), build_log_ (make_shared ()), - builds_ (make_shared ()) + builds_ (make_shared ()), + submit_ (make_shared ()) { } repository_root:: repository_root (const repository_root& r) - : module (r), + : handler (r), // - // Deep/shallow-copy sub-modules depending on whether this is an + // Deep/shallow-copy sub-handlers depending on whether this is an // exemplar/handler. // package_search_ ( @@ -113,6 +151,10 @@ namespace brep r.initialized_ ? r.builds_ : make_shared (*r.builds_)), + submit_ ( + r.initialized_ + ? r.submit_ + : make_shared (*r.submit_)), options_ ( r.initialized_ ? r.options_ @@ -120,13 +162,13 @@ namespace brep { } - // Return amalgamation of repository_root and all its sub-modules option + // Return amalgamation of repository_root and all its sub-handlers option // descriptions. // option_descriptions repository_root:: options () { - option_descriptions r (module::options ()); + option_descriptions r (handler::options ()); append (r, package_search_->options ()); append (r, package_details_->options ()); append (r, package_version_details_->options ()); @@ -136,18 +178,19 @@ namespace brep append (r, build_force_->options ()); append (r, build_log_->options ()); append (r, builds_->options ()); + append (r, submit_->options ()); return r; } - // Initialize sub-modules and parse own configuration options. + // Initialize sub-handlers and parse own configuration options. // void repository_root:: init (const name_values& v) { - auto sub_init = [this, &v] (module& m, const char* name) + auto sub_init = [this, &v] (handler& m, const char* name) { - // Initialize sub-module. Intercept exception handling to add sub-module - // attribution. + // Initialize sub-handler. Intercept exception handling to add + // sub-handler attribution. // try { @@ -167,7 +210,7 @@ namespace brep } }; - // Initialize sub-modules. + // Initialize sub-handlers. // sub_init (*package_search_, "package_search"); sub_init (*package_details_, "package_details"); @@ -178,17 +221,18 @@ namespace brep sub_init (*build_force_, "build_force"); sub_init (*build_log_, "build_log"); sub_init (*builds_, "builds"); + sub_init (*submit_, "submit"); // Parse own configuration options. // - module::init ( + handler::init ( filter (v, convert (options::repository_root::description ()))); } void repository_root:: init (scanner& s) { - MODULE_DIAG; + HANDLER_DIAG; options_ = make_shared ( s, unknown_mode::fail, unknown_mode::fail); @@ -197,7 +241,7 @@ namespace brep options_->root (dir_path ("/")); // To use libbutl timestamp printing functions later on (specifically in - // sub-modules, while handling requests). + // sub-handlers, while handling requests). // tzset (); } @@ -205,7 +249,7 @@ namespace brep bool repository_root:: handle (request& rq, response& rs) { - MODULE_DIAG; + HANDLER_DIAG; const dir_path& root (options_->root ()); @@ -215,24 +259,21 @@ namespace brep const path& lpath (rpath.leaf (root)); - // Delegate the request handling to the selected sub-module. Intercept - // exception handling to add sub-module attribution. + // Delegate the request handling to the selected sub-handler. Intercept + // exception handling to add sub-handler attribution. // auto handle = [&rq, &rs, this] (const char* nm, bool fn = false) -> bool { try { - // Delegate the handling straight away if the sub-module is not a + // Delegate the handling straight away if the sub-handler is not a // function. Otherwise, cleanup the request not to confuse the - // sub-module with the unknown parameter. + // sub-handler with the unknown parameter. // if (!fn) return handler_->handle (rq, rs, *log_); - name_values p (rq.parameters ()); - p.erase (p.begin ()); - - request_proxy rp (rq, p); + request_proxy rp (rq); return handler_->handle (rp, rs, *log_); } catch (const invalid_request&) @@ -250,7 +291,7 @@ namespace brep // to the client with the internal server error (500) code. By that // reason it is valid to reduce all these types to a single one. Note // that the server_error exception is handled internally by the - // module::handle() function call. + // handler::handle() function call. // ostringstream os; os << nm << ": " << e; @@ -258,22 +299,23 @@ namespace brep } }; - // Note that while selecting the sub-module type for handling the request, + // Note that while selecting the sub-handler type for handling the request, // we rely on the fact that the initial and all the subsequent function // calls (that may take place after the retry exception is thrown) will // end-up with the same type, and so using the single handler instance for // all of these calls is safe. Note that the selection also sets up the - // handling context (sub-module name and optionally the request proxy). + // handling context (sub-handler name and optionally the request proxy). // if (lpath.empty ()) { // Dispatch request handling to the repository_details or the one of - // build_* modules depending on the function name passed as a first HTTP + // build_* handlers depending on the function name passed as a first HTTP // request parameter (example: cppget.org/?about). Dispatch to the - // package_search module if the function name is unavailable (no + // package_search handler if the function name is unavailable (no // parameters) or is not recognized. // - const name_values& params (rq.parameters ()); + const name_values& params (rq.parameters (0 /* limit */, + true /* url_only */)); if (!params.empty ()) { const string& fn (params.front ().name); @@ -313,6 +355,13 @@ namespace brep return handle ("builds", true); } + else if (fn == "submit") + { + if (handler_ == nullptr) + handler_.reset (new submit (*submit_)); + + return handle ("submit", true); + } } if (handler_ == nullptr) @@ -323,7 +372,7 @@ namespace brep else { // Dispatch request handling to the package_details, the - // package_version_details or the build_log module depending on the HTTP + // package_version_details or the build_log handler depending on the HTTP // request URL path. // auto i (lpath.begin ()); @@ -380,7 +429,7 @@ namespace brep void repository_root:: version () { - MODULE_DIAG; + HANDLER_DIAG; info << "module " << BREP_VERSION_ID << ", libbrep " << LIBBREP_VERSION_ID diff --git a/mod/mod-repository-root.hxx b/mod/mod-repository-root.hxx index 70840ae..74691ea 100644 --- a/mod/mod-repository-root.hxx +++ b/mod/mod-repository-root.hxx @@ -22,8 +22,9 @@ namespace brep class build_force; class build_log; class builds; + class submit; - class repository_root: public module + class repository_root: public handler { public: repository_root (); @@ -65,14 +66,15 @@ namespace brep shared_ptr build_force_; shared_ptr build_log_; shared_ptr builds_; + shared_ptr submit_; shared_ptr options_; - // Sub-module the request is dispatched to. Initially is NULL. It is set + // Sub-handler the request is dispatched to. Initially is NULL. It is set // by the first call to handle() to a deep copy of the selected exemplar. // The subsequent calls of handle() (that may take place after the retry // exception is thrown) will use the existing handler instance. // - unique_ptr handler_; + unique_ptr handler_; }; } diff --git a/mod/mod-submit.cxx b/mod/mod-submit.cxx new file mode 100644 index 0000000..ff5fa9d --- /dev/null +++ b/mod/mod-submit.cxx @@ -0,0 +1,715 @@ +// file : mod/mod-submit.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // strtoul() +#include + +#include +#include +#include +#include +#include +#include +#include // operator<<(ostream, process_args) +#include +#include + +#include +#include + +#include +#include + +using namespace std; +using namespace butl; +using namespace web; +using namespace brep::cli; + +brep::submit:: +submit (const submit& r) + : handler (r), + options_ (r.initialized_ ? r.options_ : nullptr), + form_ (r.initialized_ || r.form_ == nullptr + ? r.form_ + : make_shared (*r.form_)) +{ +} + +void brep::submit:: +init (scanner& s) +{ + HANDLER_DIAG; + + options_ = make_shared ( + s, unknown_mode::fail, unknown_mode::fail); + + // Verify that the submission handling is setup properly, if configured. + // + if (options_->submit_data_specified ()) + { + // Verify that directories satisfy the requirements. + // + auto verify = [&fail] (const dir_path& d, const char* what) + { + if (d.relative ()) + fail << what << " directory path must be absolute"; + + if (!dir_exists (d)) + fail << what << " directory '" << d << "' does not exist"; + }; + + verify (options_->submit_data (), "submit-data"); + verify (options_->submit_temp (), "submit-temp"); + + // Parse XHTML5 form file, if configured. + // + if (options_->submit_form_specified ()) + { + const path& submit_form (options_->submit_form ()); + + if (submit_form.relative ()) + fail << "submit-form path must be absolute"; + + try + { + ifdstream is (submit_form); + + form_ = make_shared (is.read_text (), + submit_form.string ()); + } + catch (const xml::parsing& e) + { + fail << "unable to parse submit-form file: " << e; + } + catch (const io_error& e) + { + fail << "unable to read submit-form file '" << submit_form << "': " + << e; + } + } + + if (options_->submit_handler_specified () && + options_->submit_handler ().relative ()) + fail << "submit-handler path must be absolute"; + } + + if (options_->root ().empty ()) + options_->root (dir_path ("/")); +} + +bool brep::submit:: +handle (request& rq, response& rs) +{ + using namespace xhtml; + + using parser = manifest_parser; + using parsing = manifest_parsing; + using serializer = manifest_serializer; + using serialization = manifest_serialization; + + HANDLER_DIAG; + + const dir_path& root (options_->root ()); + + // We will respond with the manifest to the submission protocol violations + // and with a plain text message on the internal errors. In the latter case + // we will always respond with the same neutral message for security reason, + // logging the error details. Note that descriptions of exceptions caught by + // the web server are returned to the client (see web/module.hxx for + // details), and we want to avoid this when there is a danger of exposing + // sensitive data. + // + // Also we will pass through exceptions thrown by the underlying API, unless + // we need to handle them or add details for the description, in which case + // we will fallback to one of the above mentioned response methods. + // + // Note that both respond_manifest() and respond_error() are normally called + // right before the end of the request handling. They both always return + // true to allow bailing out with a single line, for example: + // + // return respond_error (); // Request is handled with an error. + // + auto respond_manifest = [&rs] (status_code status, + const string& message, + const char* ref = nullptr) -> bool + { + serializer s (rs.content (status, "text/manifest;charset=utf-8"), + "response"); + + s.next ("", "1"); // Start of manifest. + s.next ("status", to_string (status)); + s.next ("message", message); + + if (ref != nullptr) + s.next ("reference", ref); + + s.next ("", ""); // End of manifest. + return true; + }; + + auto respond_error = [&rs] (status_code status = 500) -> bool + { + rs.content (status, "text/plain;charset=utf-8") << "unable to handle " + << "submission" << endl; + return true; + }; + + // Check if the package submission functionality is enabled. + // + // Note that this is not a submission protocol violation but it feels right + // to respond with the manifest, to help the client a bit. + // + if (!options_->submit_data_specified ()) + return respond_manifest (404, "submission disabled"); + + // Parse the request form data and verifying the submission size limit. + // + // Note that if it is exceeded, then there are parameters and this is the + // submission rather than the form request, and so we respond with the + // manifest. + // + try + { + rq.parameters (options_->submit_max_size ()); + } + catch (const invalid_request& e) + { + if (e.status == 413) // Payload too large? + return respond_manifest (e.status, "submission size exceeds limit"); + + throw; + } + + // The request parameters are now parsed and the limit doesn't really matter. + // + const name_values& rps (rq.parameters (0 /* limit */)); + + // If there is no request parameters then we respond with the submission + // form XHTML, if configured. Otherwise, will proceed as for the submission + // request and will fail (missing parameters). + // + if (rps.empty () && form_ != nullptr) + { + const string title ("Submit"); + + xml::serializer s (rs.content (), title); + + s << HTML + << HEAD + << TITLE << title << ~TITLE + << CSS_LINKS (path ("submit.css"), root) + << ~HEAD + << BODY + << DIV_HEADER (root, options_->logo (), options_->menu ()) + << DIV(ID="content") << *form_ << ~DIV + << ~BODY + << ~HTML; + + return true; + } + + // Verify the submission parameters we expect. The unknown ones will be + // serialized to the submission manifest. + // + params::submit params; + + try + { + name_value_scanner s (rps); + params = params::submit (s, unknown_mode::skip, unknown_mode::skip); + } + catch (const cli::exception&) + { + return respond_manifest (400, "invalid parameter"); + } + + const string& archive (params.archive ()); + const string& sha256sum (params.sha256sum ()); + + if (archive.empty ()) + return respond_manifest (400, "package archive expected"); + + if (sha256sum.empty ()) + return respond_manifest (400, "package archive checksum expected"); + + if (sha256sum.size () != 64) + return respond_manifest (400, "invalid package archive checksum"); + + // Verify that unknown parameter values satisfy the requirements (contain + // only ASCII printable characters plus '\r', '\n', and '\t'). + // + // Actually, the expected ones must satisfy too, so check them as well. + // + auto printable = [] (const string& s) -> bool + { + for (char c: s) + { + if (!((c >= 0x20 && c <= 0x7E) || c == '\n' || c == '\r' || c == '\t')) + return false; + } + return true; + }; + + for (const name_value& nv: rps) + { + if (nv.value && !printable (*nv.value)) + return respond_manifest (400, "invalid parameter " + nv.name); + } + + // Check for a duplicate submission. + // + // Respond with the conflict (409) code if a duplicate is found. + // + string ac (sha256sum, 0, 12); + dir_path dd (options_->submit_data () / dir_path (ac)); + + if (dir_exists (dd)) + return respond_manifest (409, "duplicate submission"); + + // Create the temporary submission directory. + // + dir_path td; + + try + { + // Note that providing a meaningful prefix for temp_name() is not really + // required as the temporary directory is used by brep exclusively. However, + // using the abbreviated checksum can be helpful for troubleshooting. + // + td = dir_path (options_->submit_temp () / + dir_path (path::traits::temp_name (ac))); + + // It's highly unlikely but still possible that the temporary directory + // already exists. This can only happen due to the unclean web server + // shutdown. Let's remove it and retry. + // + if (try_mkdir (td) == mkdir_status::already_exists) + { + try_rmdir_r (td); + + if (try_mkdir (td) == mkdir_status::already_exists) + throw_generic_error (EEXIST); + } + } + catch (const invalid_path&) + { + return respond_manifest (400, "invalid package archive checksum"); + } + catch (const system_error& e) + { + error << "unable to create directory '" << td << "': " << e; + return respond_error (); + } + + auto_rmdir tdr (td); + + // Save the package archive into the temporary directory and verify its + // checksum. + // + // Note that the archive file name can potentially contain directory path + // in the client's form (e.g., Windows), so let's strip it if that's the + // case. + // + path a; + path af; + + try + { + size_t n (archive.find_last_of ("\\/")); + a = path (n != string::npos ? string (archive, n + 1) : archive); + af = td / a; + } + catch (const invalid_path&) + { + return respond_manifest (400, "invalid package archive name"); + } + + try + { + istream& is (rq.open_upload ("archive")); + + // Note that istream::read() sets failbit if unable to read the requested + // number of bytes. + // + is.exceptions (istream::badbit); + + sha256 sha; + char buf[8192]; + ofdstream os (af, ios::binary); + + while (!eof (is)) + { + is.read (buf, sizeof (buf)); + + if (size_t n = is.gcount ()) + { + sha.append (buf, n); + os.write (buf, n); + } + } + + os.close (); + + if (sha.string () != sha256sum) + return respond_manifest (400, "package archive checksum mismatch"); + } + // Note that invalid_argument (thrown by open_upload() function call) can + // mean both no archive upload or multiple archive uploads. + // + catch (const invalid_argument&) + { + return respond_manifest (400, "package archive upload expected"); + } + catch (const io_error& e) + { + error << "unable to write package archive '" << af << "': " << e; + return respond_error (); + } + + // Serialize the submission request manifest to a stream. On the + // serialization error respond to the client with the manifest containing + // the bad request (400) code and return false, on the stream error pass + // through the io_error exception, otherwise return true. + // + timestamp ts (system_clock::now ()); + + auto rqm = [&a, &sha256sum, &ts, &rq, &rps, &respond_manifest] + (ostream& os) -> bool + { + try + { + serializer s (os, "request"); + + // Serialize the submission manifest header. + // + s.next ("", "1"); // Start of manifest. + s.next ("archive", a.string ()); + s.next ("sha256sum", sha256sum); + + s.next ("timestamp", + butl::to_string (ts, + "%Y-%m-%dT%H:%M:%SZ", + false /* special */, + false /* local */)); + + // Serialize the User-Agent HTTP header and the client IP address. + // + optional ip; + optional ua; + for (const name_value& h: rq.headers ()) + { + if (casecmp (h.name, ":Client-IP") == 0) + ip = h.value; + else if (casecmp (h.name, "User-Agent") == 0) + ua = h.value; + } + + if (ip) + s.next ("client-ip", *ip); + + if (ua) + s.next ("user-agent", *ua); + + // Serialize the request parameters. + // + // Note that the serializer constraints the parameter names (can't start + // with '#', can't contain ':' and the whitespaces, etc.). + // + for (const name_value& nv: rps) + { + const string& n (nv.name); + if (n != "archive" && n != "sha256sum") + s.next (n, nv.value ? *nv.value : ""); + } + + s.next ("", ""); // End of manifest. + return true; + } + catch (const serialization& e) + { + respond_manifest (400, string ("invalid parameter: ") + e.what ()); + return false; + } + }; + + // Serialize the submission request manifest to the temporary submission + // directory. + // + path rqf (td / "request.manifest"); + + try + { + ofdstream os (rqf); + bool r (rqm (os)); + os.close (); + + if (!r) + return true; // The client is already responded with the manifest. + } + catch (const io_error& e) + { + error << "unable to write to '" << rqf << "': " << e; + return respond_error (); + } + + // Make the temporary submission directory permanent. + // + // Respond with the conflict (409) code if a submission race is detected. + // + try + { + mvdir (td, dd); + } + catch (const system_error& e) + { + int ec (e.code ().value ()); + if (ec == ENOTEMPTY || ec == EEXIST) + return respond_manifest (409, "duplicate submission"); + + error << "unable to rename directory '" << td << "' to '" << dd << "': " + << e; + + return respond_error (); + } + + // Given that the submission data is now successfully persisted we are no + // longer in charge of removing it, even in case of a subsequent error. + // + tdr.cancel (); + + auto print_args = [&trace, this] (const char* args[], size_t n) + { + l2 ([&]{trace << process_args {args, n};}); + }; + + // Run the submission handler, if specified, reading the result manifest + // from its stdout and caching it as a name/value pair list for later use + // (forwarding to the client, sending via email, etc.). + // + // Note that if the handler is configured then the cache can never be empty, + // containing at least the status value. Thus, an empty cache indicates that + // the handler is not configured. + // + status_code sc; + vector rvs; + + if (options_->submit_handler_specified ()) + { + const path& handler (options_->submit_handler ()); + + for (;;) // Breakout loop. + try + { + fdpipe pipe (fdopen_pipe ()); // Can throw io_error. + + // Redirect the diagnostics to the web server error log. + // + process pr ( + process_start_callback (print_args, + 0 /* stdin */, + pipe /* stdout */, + 2 /* stderr */, + handler, + options_->submit_handler_argument (), + dd)); + pipe.out.close (); + + try + { + ifdstream is (move (pipe.in)); + + // Parse and verify the manifest. Obtain the HTTP status code (must go + // first) and cache it for the subsequent responding to the client. + // + parser p (is, "handler"); + manifest_name_value nv (p.next ()); + + auto bad_name ([&p, &nv] (const string& d) { + throw parsing (p.name (), nv.name_line, nv.name_column, d);}); + + auto bad_value ([&p, &nv] (const string& d) { + throw parsing (p.name (), nv.value_line, nv.value_column, d);}); + + const string& n (nv.name); + const string& v (nv.value); + + // Make sure this is the start and we support the version. + // + if (!n.empty ()) + bad_name ("start of manifest expected"); + + if (v != "1") + bad_value ("unsupported format version"); + + // Cache start of manifest. + // + rvs.push_back (move (nv)); + + // Get and verify the HTTP status. + // + nv = p.next (); + if (n != "status") + bad_value ("no status specified"); + + char* e (nullptr); + unsigned long c (strtoul (v.c_str (), &e, 10)); // Can't throw. + + assert (e != nullptr); + + if (!(*e == '\0' && c >= 100 && c < 600)) + bad_value ("invalid http status '" + v + "'"); + + // Cache the HTTP status. + // + sc = static_cast (c); + rvs.push_back (move (nv)); + + // Cache the remaining name/value pairs. + // + for (nv = p.next (); !nv.empty (); nv = p.next ()) + rvs.push_back (move (nv)); + + // Cache end of manifest. + // + rvs.push_back (move (nv)); + + is.close (); + + if (pr.wait ()) + break; // Get out of the breakout loop. + + assert (pr.exit); + error << "process " << handler << " " << *pr.exit; + + // Fall through. + } + catch (const parsing& e) + { + if (pr.wait ()) + error << "unable to parse handler's output: " << e; + + // Fall through. + } + catch (const io_error& e) + { + if (pr.wait ()) + error << "unable to read handler's output: " << e; + + // Fall through. + } + + return respond_error (); + } + // Handle process_error and io_error (both derive from system_error). + // + catch (const system_error& e) + { + error << "unable to execute '" << handler << "': " << e; + return respond_error (); + } + } + + // Serialize the submission result manifest to a stream. On the + // serialization error log the error description and return false, on the + // stream error pass through the io_error exception, otherwise return true. + // + auto rsm = [&rvs, &error] (ostream& os) -> bool + { + assert (!rvs.empty ()); + + try + { + serializer s (os, "result"); + for (const manifest_name_value& nv: rvs) + s.next (nv.name, nv.value); + + return true; + } + catch (const serialization& e) + { + error << "unable to serialize handler's output: " << e; + return false; + } + }; + + // Save the result manifest, if generated, into the submission directory + // if it still exists (note that the handler could move or remove it). + // + path rsf (dd / "result.manifest"); + + if (!rvs.empty () && dir_exists (dd)) + try + { + ofdstream os (rsf); + bool r (rsm (os)); + os.close (); + + if (!r) + return respond_error (); // The error description is already logged. + } + catch (const io_error& e) + { + error << "unable to write to '" << rsf << "': " << e; + return respond_error (); + } + + // Send email, if configured. + // + // Note that we don't consider the email sending failure to be a submission + // failure as the submission data is successfully persisted and the handler + // is successfully executed, if configured. One can argue that email can be + // essential for the submission processing and missing it would result in + // the incomplete submission. In this case it's natural to assume that the + // web server error log is monitored and the email sending failure will be + // noticed. + // + if (options_->submit_email_specified ()) + try + { + // Redirect the diagnostics to the web server error log. + // + sendmail sm (print_args, + 2 /* stderr */, + options_->email (), + "new package submission " + a.string () + " (" + ac + ")", + {options_->submit_email ()}); + + // Write the submission request manifest. + // + bool r (rqm (sm.out)); + assert (r); // The serialization succeeded once, so can't fail now. + + // Write the submission result manifest, if present. + // + if (!rvs.empty ()) + { + sm.out << "\n\n"; + + rsm (sm.out); // We don't care about the result (see above). + } + + sm.out.close (); + + if (!sm.wait ()) + error << "sendmail " << *sm.exit; + } + // Handle process_error and io_error (both derive from system_error). + // + catch (const system_error& e) + { + error << "sendmail error: " << e; + } + + // Respond with implied result manifest if the handler is not configured. + // + if (rvs.empty ()) + return respond_manifest (200, "submission queued", ac.c_str ()); + + if (!rsm (rs.content (sc, "text/manifest;charset=utf-8"))) + return respond_error (); // The error description is already logged. + + return true; +} diff --git a/mod/mod-submit.hxx b/mod/mod-submit.hxx new file mode 100644 index 0000000..ea83e03 --- /dev/null +++ b/mod/mod-submit.hxx @@ -0,0 +1,45 @@ +// file : mod/mod-submit.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_SUBMIT_HXX +#define MOD_MOD_SUBMIT_HXX + +#include + +#include +#include + +#include +#include + +namespace brep +{ + class submit: public handler + { + public: + submit () = default; + + // Create a shallow copy (handling instance) if initialized and a deep + // copy (context exemplar) otherwise. + // + explicit + submit (const submit&); + + virtual bool + handle (request&, response&); + + virtual const cli::options& + cli_options () const {return options::submit::description ();} + + private: + virtual void + init (cli::scanner&); + + private: + shared_ptr options_; + shared_ptr form_; + }; +} + +#endif // MOD_MOD_SUBMIT_HXX diff --git a/mod/module.cxx b/mod/module.cxx index 8a3f78b..82fc312 100644 --- a/mod/module.cxx +++ b/mod/module.cxx @@ -21,9 +21,9 @@ using namespace placeholders; // For std::bind's _1, etc. namespace brep { - // module + // handler // - bool module:: + bool handler:: handle (request& rq, response& rs, log& l) { log_ = &l; @@ -74,7 +74,7 @@ namespace brep return true; } - option_descriptions module:: + option_descriptions handler:: convert (const cli::options& o) { option_descriptions r; @@ -82,7 +82,7 @@ namespace brep return r; } - void module:: + void handler:: append (option_descriptions& dst, const cli::options& src) { for (const auto& o: src) @@ -99,7 +99,7 @@ namespace brep } } - void module:: + void handler:: append (option_descriptions& dst, const option_descriptions& src) { for (const auto& o: src) @@ -109,7 +109,7 @@ namespace brep } } - name_values module:: + name_values handler:: filter (const name_values& v, const option_descriptions& d) { name_values r; @@ -123,20 +123,20 @@ namespace brep } // Convert CLI option descriptions to the general interface of option - // descriptions, extend with brep::module own option descriptions. + // descriptions, extend with brep::handler own option descriptions. // - option_descriptions module:: + option_descriptions handler:: options () { option_descriptions r ({{"conf", true}}); - append (r, options::module::description ()); + append (r, options::handler::description ()); append (r, cli_options ()); return r; } // Expand option list parsing configuration files. // - name_values module:: + name_values handler:: expand_options (const name_values& v) { using namespace cli; @@ -175,14 +175,14 @@ namespace brep } // Parse options with a cli-generated scanner. Options verb and conf are - // recognized by brep::module::init while others to be interpreted by the + // recognized by brep::handler::init while others to be interpreted by the // derived init(). If there is an option which can not be interpreted - // neither by brep::module nor by the derived class, then the web server + // neither by brep::handler nor by the derived class, then the web server // is terminated with a corresponding error message being logged. Though // this should not happen if the options() function returned the correct // set of options. // - void module:: + void handler:: init (const name_values& options, log& log) { assert (!initialized_); @@ -193,18 +193,18 @@ namespace brep { name_values opts (expand_options (options)); - // Read module implementation configuration. + // Read handler implementation configuration. // init (opts); - // Read brep::module configuration. + // Read brep::handler configuration. // static option_descriptions od ( - convert (options::module::description ())); + convert (options::handler::description ())); name_values mo (filter (opts, od)); name_value_scanner s (mo); - options::module o (s, cli::unknown_mode::fail, cli::unknown_mode::fail); + options::handler o (s, cli::unknown_mode::fail, cli::unknown_mode::fail); verb_ = o.verbosity (); initialized_ = true; @@ -222,21 +222,21 @@ namespace brep } } - void module:: + void handler:: init (const name_values& options) { name_value_scanner s (options); init (s); - assert (!s.more ()); // Module didn't handle its options. + assert (!s.more ()); // Handler didn't handle its options. } - module:: - module (): log_writer_ (bind (&module::log_write, this, _1)) {} + handler:: + handler (): log_writer_ (bind (&handler::log_write, this, _1)) {} // Custom copy constructor is required to initialize log_writer_ properly. // - module:: - module (const module& m): module () + handler:: + handler (const handler& m): handler () { verb_ = m.verb_; initialized_ = m.initialized_; @@ -250,7 +250,7 @@ namespace brep // virtual std::string (* (* brep::search::func(std::string (* (*)(char))(int) // ,std::string (* (*)(wchar_t))(int)) const)(int, int))(int) // - string module:: + string handler:: func_name (const char* pretty_name) { const char* e (strchr (pretty_name, ')')); @@ -293,10 +293,10 @@ namespace brep } } - throw invalid_argument ("::brep::module::func_name"); + throw invalid_argument ("::brep::handler::func_name"); } - void module:: + void handler:: log_write (const diag_data& d) const { if (log_ == nullptr) @@ -313,7 +313,7 @@ namespace brep // // Use APLOG_INFO (as opposed to APLOG_TRACE1) as a mapping for // severity::trace. "LogLevel trace1" configuration directive switches - // on the avalanche of log messages from various modules. Would be good + // on the avalanche of log messages from various handlers. Would be good // to avoid wading through them. // static int s[] = {APLOG_ERR, APLOG_WARNING, APLOG_INFO, APLOG_INFO}; @@ -341,16 +341,16 @@ namespace brep } } - void module:: + void handler:: version (log& l) { log_ = &l; version (); } - // module::name_value_scanner + // handler::name_value_scanner // - module::name_value_scanner:: + handler::name_value_scanner:: name_value_scanner (const name_values& nv) noexcept : name_values_ (nv), i_ (nv.begin ()), @@ -358,13 +358,13 @@ namespace brep { } - bool module::name_value_scanner:: + bool handler::name_value_scanner:: more () { return i_ != name_values_.end (); } - const char* module::name_value_scanner:: + const char* handler::name_value_scanner:: peek () { if (i_ != name_values_.end ()) @@ -373,7 +373,7 @@ namespace brep throw cli::eos_reached (); } - const char* module::name_value_scanner:: + const char* handler::name_value_scanner:: next () { if (i_ != name_values_.end ()) @@ -386,7 +386,7 @@ namespace brep throw cli::eos_reached (); } - void module::name_value_scanner:: + void handler::name_value_scanner:: skip () { if (i_ != name_values_.end ()) diff --git a/mod/module.hxx b/mod/module.hxx index f549892..127cdab 100644 --- a/mod/module.hxx +++ b/mod/module.hxx @@ -19,7 +19,7 @@ namespace brep // // @@ Maybe doing using namespace is the right way to handle this. // There will, however, most likely be a conflict between - // web::module and our module. Or maybe not, need to try. + // web::handler and our handler. Or maybe not, need to try. // using web::status_code; using web::invalid_request; @@ -33,7 +33,7 @@ namespace brep // This exception indicated a server error (5XX). In particular, // it is thrown by the fail diagnostics stream and is caught by the - // module implementation where it is both logged as an error and + // handler implementation where it is both logged as an error and // returned to the user with the 5XX status code. // struct server_error @@ -43,15 +43,15 @@ namespace brep server_error (diag_data&& d): data (move (d)) {} }; - // Every module member function that needs to produce any diagnostics + // Every handler member function that needs to produce any diagnostics // shall begin with: // - // MODULE_DIAG; + // HANDLER_DIAG; // // This will instantiate the fail, error, warn, info, and trace // diagnostics streams with the function's name. // -#define MODULE_DIAG \ +#define HANDLER_DIAG \ const fail_mark fail (__PRETTY_FUNCTION__); \ const basic_mark error (severity::error, \ this->log_writer_, \ @@ -66,9 +66,9 @@ namespace brep this->log_writer_, \ __PRETTY_FUNCTION__) - // Adaptation of the web::module to our needs. + // Adaptation of the web::handler to our needs. // - class module: public web::module + class handler: public web::handler { // Diagnostics. // @@ -87,15 +87,15 @@ namespace brep template void l1 (const F& f) const {if (verb_ >= 1) f ();} template void l2 (const F& f) const {if (verb_ >= 2) f ();} - // Set to true when the module is successfully initialized. + // Set to true when the handler is successfully initialized. // bool initialized_ {false}; // Implementation details. // protected: - module (); - module (const module& ); + handler (); + handler (const handler& ); static name_values filter (const name_values&, const option_descriptions&); @@ -109,7 +109,7 @@ namespace brep static void append (option_descriptions& dst, const option_descriptions& src); - // Can be used by module implementation to parse HTTP request parameters. + // Can be used by handler implementation to parse HTTP request parameters. // class name_value_scanner: public cli::scanner { @@ -142,7 +142,7 @@ namespace brep init (cli::scanner&) = 0; // Can be overriden by custom request dispatcher to initialize - // sub-modules. + // sub-handlers. // virtual void init (const name_values&); @@ -156,12 +156,12 @@ namespace brep virtual bool handle (request&, response&, log&); - // web::module interface. + // web::handler interface. // public: // Custom request dispatcher can aggregate its own option descriptions - // with sub-modules option descriptions. In this case it should still call - // the base implementation in order to include the brep::module's options. + // with sub-handlers option descriptions. In this case it should still call + // the base implementation in order to include the brep::handler's options. // virtual option_descriptions options (); @@ -170,7 +170,7 @@ namespace brep virtual void version (log&); - // Can be overriden by the module implementation to log version, etc. + // Can be overriden by the handler implementation to log version, etc. // virtual void version () {} diff --git a/mod/options.cli b/mod/options.cli index e6b0840..97453a7 100644 --- a/mod/options.cli +++ b/mod/options.cli @@ -10,13 +10,13 @@ include ; namespace brep { - // Web module configuration options. + // Web handler configuration options. // namespace options { // Option groups. // - class module + class handler { string email { @@ -307,9 +307,9 @@ namespace brep } }; - // Module options. + // Handler options. // - class package_search: search, package_db, page, module + class package_search: search, package_db, page, handler { string search-title = "Packages" { @@ -319,22 +319,22 @@ namespace brep } }; - class package_details: package, search, package_db, page, module + class package_details: package, search, package_db, page, handler { }; class package_version_details: package, package_db, build, build_db, page, - module + handler { }; - class repository_details: package_db, page, module + class repository_details: package_db, page, handler { }; - class build_task: build, package_db, build_db, module + class build_task: build, package_db, build_db, handler { size_t build-task-request-max-size = 102400 { @@ -353,7 +353,7 @@ namespace brep } }; - class build_result: build, package_db, build_db, module + class build_result: build, package_db, build_db, handler { size_t build-result-request-max-size = 10240000 { @@ -365,15 +365,15 @@ namespace brep } }; - class build_log: build, package_db, build_db, module + class build_log: build, package_db, build_db, handler { }; - class build_force: build, package_db, build_db, module + class build_force: build, package_db, build_db, handler { }; - class builds: build, package_db, build_db, page, module + class builds: build, package_db, build_db, page, handler { uint16_t build-configurations = 10 { @@ -388,7 +388,7 @@ namespace brep } }; - class submit: page, module + class submit: page, handler { dir_path submit-data { @@ -411,8 +411,8 @@ namespace brep if the package submission functionality is enabled. Note that this directory must be on the same filesystem and satisfy - the same requirements as \cb{submit-data}. Its contents are - automatically cleaned up on each web server startup." + the same requirements as \cb{submit-data}. It is also the user's + responsibility to clean it up after an unclean web server shutdown." } size_t submit-max-size = 10485760 @@ -460,12 +460,12 @@ namespace brep } }; - class repository_root: module + class repository_root: handler { }; } - // Web module HTTP request parameters. + // Web handler HTTP request parameters. // namespace params { diff --git a/mod/page.cxx b/mod/page.cxx index 94e4f4f..614bf79 100644 --- a/mod/page.cxx +++ b/mod/page.cxx @@ -25,6 +25,10 @@ using namespace xml; using namespace web; using namespace web::xhtml; +// Note that in HTML5 the boolean attribute absence represents false value, +// true otherwise. If it is present then the value must be empty or +// case-insensitively match the attribute's name. +// namespace brep { // CSS_LINKS @@ -90,8 +94,7 @@ namespace brep << TBODY << TR << TD(ID="search-txt") - << *INPUT(TYPE="search", NAME="q", VALUE=query_, - AUTOFOCUS="autofocus") + << *INPUT(TYPE="search", NAME="q", VALUE=query_, AUTOFOCUS="") << ~TD << TD(ID="search-btn") << *INPUT(TYPE="submit", VALUE="Search") @@ -141,7 +144,7 @@ namespace brep s << PLACEHOLDER(placeholder_); if (autofocus_) - s << AUTOFOCUS("autofocus"); + s << AUTOFOCUS(""); s << ~INPUT << ~TD diff --git a/repositories.manifest b/repositories.manifest index 538ae37..723cb95 100644 --- a/repositories.manifest +++ b/repositories.manifest @@ -19,6 +19,10 @@ location: https://git.build2.org/packaging/apr/libapr1.git##HEAD : role: prerequisite +location: https://git.build2.org/packaging/apreq/libapreq2.git##HEAD + +: +role: prerequisite location: https://git.codesynthesis.com/odb/libodb.git##HEAD : diff --git a/web/apache/request.cxx b/web/apache/request.cxx index 32a1737..a019183 100644 --- a/web/apache/request.cxx +++ b/web/apache/request.cxx @@ -4,13 +4,20 @@ #include -#include // apr_table_*, apr_array_header_t +#include // APR_SIZE_MAX +#include // apr_status_t, APR_SUCCESS, APR_E*, apr_strerror() +#include // apr_table_*, apr_table_*(), apr_array_header_t #include // apr_pstrdup() +#include // apr_bucket*, apr_bucket_*(), apr_brigade_*(), + // APR_BRIGADE_*() #include // request_rec, HTTP_*, OK #include // ap_*() -#include // strcasecmp(), strncasecmp() +#include // APREQ_* +#include // apreq_brigade_copy() +#include // apreq_param_t, apreq_value_to_param() +#include // apreq_parser_t, apreq_parser_make() #include // strftime(), time_t #include @@ -22,11 +29,13 @@ #include #include // str*(), memcpy(), size_t #include // move() -#include // invalid_argument +#include // istreambuf_iterator +#include // invalid_argument, runtime_error #include // current_exception() #include #include // min() +#include // casecmp() #include #include @@ -39,6 +48,13 @@ namespace web { namespace apache { + [[noreturn]] static void + throw_internal_error (apr_status_t s, const string& what) + { + char buf[1024]; + throw runtime_error (what + ": " + apr_strerror (s, buf, sizeof (buf))); + } + // Extend the Apache stream with checking for the read limit and caching // the content if requested. Replay the cached content after rewind. // @@ -160,7 +176,7 @@ namespace web // mode_ = mode::cache; - // Bailout if the end of stream is reached. + // Bail out if the end of stream is reached. // if (eof_) return traits_type::eof (); @@ -230,6 +246,152 @@ namespace web return r; } + // Stream interface for reading from the Apache's bucket brigade. Put back + // is not supported. + // + // Note that reading from a brigade bucket modifies the brigade in the + // general case. For example, reading from a file bucket adds a new heap + // bucket before the file bucket on every read. Traversing/reading through + // such a bucket brigade effectively loads the whole file into the memory, + // so the subsequent brigade traversal results in iterating over the + // loaded heap buckets. + // + // To avoid such a behavior we will make a shallow copy of the original + // bucket brigade, initially and for each rewind. Then, instead of + // iterating, we will always read from the first bucket removing it after + // the use. + // + class istreambuf_buckets: public streambuf + { + public: + // The bucket brigade must exist during the object's lifetime. + // + explicit + istreambuf_buckets (const apr_bucket_brigade* bs) + : orig_buckets_ (bs), + buckets_ (apr_brigade_create (bs->p, bs->bucket_alloc)) + + { + if (buckets_ == nullptr) + throw_internal_error (APR_ENOMEM, "apr_brigade_create"); + + rewind (); // Copy the original buckets. + } + + void + rewind () + { + // Note that apreq_brigade_copy() appends buckets to the destination, + // so we clean it up first. + // + apr_status_t r (apr_brigade_cleanup (buckets_.get ())); + if (r != APR_SUCCESS) + throw_internal_error (r, "apr_brigade_cleanup"); + + r = apreq_brigade_copy ( + buckets_.get (), + const_cast (orig_buckets_)); + + if (r != APR_SUCCESS) + throw_internal_error (r, "apreq_brigade_copy"); + + setg (nullptr, nullptr, nullptr); + } + + private: + virtual int_type + underflow () + { + if (gptr () < egptr ()) + return traits_type::to_int_type (*gptr ()); + + // If the get-pointer is not NULL then it points to the data referred + // by the first brigade bucket. As we will bail out or rewrite such a + // pointer now there is no need for the bucket either, so we can + // safely delete it. + // + if (gptr () != nullptr) + { + assert (!APR_BRIGADE_EMPTY (buckets_)); + + // Note that apr_bucket_delete() is a macro and the following + // call ends up badly (with SIGSEGV). + // + // apr_bucket_delete (APR_BRIGADE_FIRST (buckets_)); + // + apr_bucket* b (APR_BRIGADE_FIRST (buckets_)); + apr_bucket_delete (b); + } + + if (APR_BRIGADE_EMPTY (buckets_)) + return traits_type::eof (); + + apr_size_t n; + const char* d; + apr_bucket* b (APR_BRIGADE_FIRST (buckets_)); + apr_status_t r (apr_bucket_read (b, &d, &n, APR_BLOCK_READ)); + + if (r != APR_SUCCESS) + throw_internal_error (r, "apr_bucket_read"); + + char* p (const_cast (d)); + setg (p, p, p + n); + return traits_type::to_int_type (*gptr ()); + } + + private: + const apr_bucket_brigade* orig_buckets_; + + struct brigade_deleter + { + void operator() (apr_bucket_brigade* p) const + { + if (p != nullptr) + { + apr_status_t r (apr_brigade_destroy (p)); + + // Shouldn't fail unless something is severely damaged. + // + assert (r == APR_SUCCESS); + } + } + }; + + unique_ptr buckets_; + }; + + class istream_buckets_base + { + public: + explicit + istream_buckets_base (const apr_bucket_brigade* bs): buf_ (bs) {} + + protected: + istreambuf_buckets buf_; + }; + + class istream_buckets: public istream_buckets_base, public istream + { + public: + explicit + istream_buckets (const apr_bucket_brigade* bs) + // Note that calling dtor for istream object before init() is called + // is undefined behavior. That's the reason for inventing the + // istream_buckets_base class. + // + : istream_buckets_base (bs), istream (&buf_) + { + exceptions (failbit | badbit); + } + + void + rewind () + { + buf_.rewind (); + clear (); // Clears *bit flags (in particular eofbit). + } + }; + // request // request:: @@ -305,17 +467,27 @@ namespace web ap_set_content_type (rec_, nullptr); // Unset the output content type. - if (in_ != nullptr) - rewind_istream (); - } + // We don't need to rewind the input stream (which well may fail if + // unbuffered) if the form data is already read. + // + if (in_ != nullptr && form_data_ == nullptr) + { + assert (in_buf_ != nullptr); - void request:: - rewind_istream () - { - assert (in_buf_ != nullptr && in_ != nullptr); + in_buf_->rewind (); // Throws if impossible to rewind. + in_->clear (); // Clears *bit flags (in particular eofbit). + } - in_buf_->rewind (); // Throws if impossible to rewind. - in_->clear (); // Clears *bit flags (in particular eofbit). + // Rewind uploaded file streams. + // + if (uploads_ != nullptr) + { + for (const unique_ptr& is: *uploads_) + { + if (is != nullptr) + is->rewind (); + } + } } istream& request:: @@ -332,11 +504,6 @@ namespace web in_.reset (new istream (in_buf.get ())); in_buf_ = move (in_buf); in_->exceptions (istream::failbit | istream::badbit); - - // Save form data now otherwise will not be available to do later when - // data is already read from stream. - // - form_data (); } else { @@ -363,26 +530,309 @@ namespace web } const name_values& request:: - parameters () + parameters (size_t limit, bool url_only) { - if (parameters_ == nullptr) + if (parameters_ == nullptr || url_only < url_only_parameters_) { - parameters_.reset (new name_values ()); - try { - parse_parameters (rec_->args); - parse_parameters (form_data ().c_str ()); + if (parameters_ == nullptr) + { + parameters_.reset (new name_values ()); + parse_url_parameters (rec_->args); + } + + if (!url_only && form_data (limit)) + { + // After the form data is parsed we can clean it up for the + // application/x-www-form-urlencoded encoding but not for the + // multipart/form-data (see parse_multipart_parameters() for + // details). + // + if (form_multipart_) + parse_multipart_parameters (*form_data_); + else + { + // Make the character vector a NULL-terminated string. + // + form_data_->push_back ('\0'); + + parse_url_parameters (form_data_->data ()); + *form_data_ = vector (); // Reset the cache. + } + } } - catch (const invalid_argument& ) + catch (const invalid_argument&) { throw invalid_request (); } + + url_only_parameters_ = url_only; } return *parameters_; } + bool request:: + form_data (size_t limit) + { + if (form_data_ == nullptr) + { + form_data_.reset (new vector ()); + + // We will not consider POST body as a form data if the request is in + // the reading or later state. + // + if (rec_->method_number == M_POST && state_ < request_state::reading) + { + const char* ct (apr_table_get (rec_->headers_in, "Content-Type")); + + if (ct != nullptr) + { + form_multipart_ = casecmp ("multipart/form-data", ct, 19) == 0; + + if (form_multipart_ || + casecmp ("application/x-www-form-urlencoded", ct, 33) == 0) + *form_data_ = vector ( + istreambuf_iterator (content (limit)), + istreambuf_iterator ()); + } + } + } + + return !form_data_->empty (); + } + + void request:: + parse_url_parameters (const char* args) + { + assert (parameters_ != nullptr); + + for (auto n (args); n != nullptr; ) + { + const char* v (strchr (n, '=')); + const char* e (strchr (n, '&')); + + if (e != nullptr && e < v) + v = nullptr; + + string name (v != nullptr + ? mime_url_decode (n, v) : + (e + ? mime_url_decode (n, e) + : mime_url_decode (n, n + strlen (n)))); + + optional value; + + if (v++) + value = e + ? mime_url_decode (v, e) + : mime_url_decode (v, v + strlen (v)); + + if (!name.empty () || value) + parameters_->emplace_back (move (name), move (value)); + + n = e ? e + 1 : nullptr; + } + } + + void request:: + parse_multipart_parameters (const vector& body) + { + assert (parameters_ != nullptr && uploads_ == nullptr); + + auto throw_bad_request = [] (apr_status_t s, + status_code sc = HTTP_BAD_REQUEST) + { + char buf[1024]; + throw invalid_request (sc, apr_strerror (s, buf, sizeof (buf))); + }; + + // Create the file upload stream list, filling it with NULLs for the + // parameters parsed from the URL query part. + // + uploads_.reset ( + new vector> (parameters_->size ())); + + // All the required objects (parser, input/output buckets, etc.) will be + // allocated in the request memory pool and so will have the HTTP + // request duration lifetime. + // + apr_pool_t* pool (rec_->pool); + + // Create the input bucket brigade containing a single bucket that + // references the form data. + // + apr_bucket_alloc_t* ba (apr_bucket_alloc_create (pool)); + if (ba == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_alloc_create"); + + apr_bucket_brigade* bb (apr_brigade_create (pool, ba)); + if (bb == nullptr) + throw_internal_error (APR_ENOMEM, "apr_brigade_create"); + + apr_bucket* b ( + apr_bucket_immortal_create (body.data (), body.size (), ba)); + + if (b == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_immortal_create"); + + APR_BRIGADE_INSERT_TAIL (bb, b); + + if ((b = apr_bucket_eos_create (ba)) == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_eos_create"); + + APR_BRIGADE_INSERT_TAIL (bb, b); + + // Make sure that the parser will not swap the parsed data to disk + // passing the maximum possible value for the brigade limit. This way + // the resulting buckets will reference the form data directly, making + // no copies. This why we should not reset the form data cache after + // the parsing. + // + // Note that in future we may possibly setup the parser to read from the + // Apache internals directly and enable swapping the data to disk to + // minimize memory consumption. + // + apreq_parser_t* parser ( + apreq_parser_make (pool, + ba, + apr_table_get (rec_->headers_in, "Content-Type"), + apreq_parse_multipart, + APR_SIZE_MAX /* brigade_limit */, + nullptr /* temp_dir */, + nullptr /* hook */, + nullptr /* ctx */)); + + if (parser == nullptr) + throw_internal_error (APR_ENOMEM, "apreq_parser_make"); + + // Create the output table that will be filled with the parsed + // parameters. + // + apr_table_t* params (apr_table_make (pool, APREQ_DEFAULT_NELTS)); + if (params == nullptr) + throw_internal_error (APR_ENOMEM, "apr_table_make"); + + // Parse the form data. + // + apr_status_t r (apreq_parser_run (parser, params, bb)); + if (r != APR_SUCCESS) + throw_bad_request (r); + + // Fill the parameter and file upload stream lists. + // + const apr_array_header_t* ps (apr_table_elts (params)); + size_t n (ps->nelts); + + for (auto p (reinterpret_cast (ps->elts)); + n--; ++p) + { + assert (p->key != nullptr && p->val != nullptr); + + if (*p->key != '\0') + { + parameters_->emplace_back (p->key, optional (p->val)); + + const apreq_param_t* ap (apreq_value_to_param (p->val)); + assert (ap != nullptr); // Must always be resolvable. + + uploads_->emplace_back (ap->upload != nullptr + ? new istream_buckets (ap->upload) + : nullptr); + } + } + } + + request::uploads_type& request:: + uploads () const + { + if (parameters_ == nullptr || url_only_parameters_) + sequence_error ("web::apache::request::uploads"); + + if (uploads_ == nullptr) + throw invalid_argument ("no uploads"); + + assert (uploads_->size () == parameters_->size ()); + return *uploads_; + } + + istream& request:: + open_upload (size_t index) + { + uploads_type& us (uploads ()); + size_t n (us.size ()); + + if (index >= n) + throw invalid_argument ("invalid index"); + + const unique_ptr& is (us[index]); + + if (is == nullptr) + throw invalid_argument ("no upload"); + + return *is; + } + + istream& request:: + open_upload (const string& name) + { + uploads_type& us (uploads ()); + size_t n (us.size ()); + + istream* r (nullptr); + for (size_t i (0); i < n; ++i) + { + if ((*parameters_)[i].name == name) + { + istream* is (us[i].get ()); + + if (is != nullptr) + { + if (r != nullptr) + throw invalid_argument ("multiple uploads for '" + name + "'"); + + r = is; + } + } + } + + if (r == nullptr) + throw invalid_argument ("no upload"); + + return *r; + } + + const name_values& request:: + headers () + { + if (headers_ == nullptr) + { + headers_.reset (new name_values ()); + + const apr_array_header_t* ha (apr_table_elts (rec_->headers_in)); + size_t n (ha->nelts); + + headers_->reserve (n + 1); // One for the custom :Client-IP header. + + auto add = [this] (const char* n, const char* v) + { + assert (n != nullptr && v != nullptr); + headers_->emplace_back (n, optional (v)); + }; + + for (auto h (reinterpret_cast (ha->elts)); + n--; ++h) + add (h->key, h->val); + + assert (rec_->connection != nullptr); + + add (":Client-IP", rec_->connection->client_ip); + } + + return *headers_; + } + const name_values& request:: cookies () { @@ -393,10 +843,12 @@ namespace web const apr_array_header_t* ha (apr_table_elts (rec_->headers_in)); size_t n (ha->nelts); - for (auto h (reinterpret_cast (ha->elts)); + for (auto h (reinterpret_cast (ha->elts)); n--; ++h) { - if (strcasecmp (h->key, "Cookie") == 0) + assert (h->key != nullptr); + + if (casecmp (h->key, "Cookie") == 0) { for (const char* n (h->val); n != nullptr; ) { @@ -447,8 +899,7 @@ namespace web // Same content type. // - strcasecmp (rec_->content_type ? rec_->content_type : "", - type.c_str ()) == 0) + casecmp (type, rec_->content_type ? rec_->content_type : "") == 0) { // No change, return the existing stream. // @@ -463,7 +914,10 @@ namespace web // written. Save form data now to make it available for future // parameters() call. // - form_data (); + // In the rare cases when the form data is expectedly bigger than 64K + // the client can always call parameters(limit) explicitly. + // + form_data (64 * 1024); unique_ptr out_buf ( buffer @@ -548,83 +1002,5 @@ namespace web state (request_state::headers); apr_table_add (rec_->err_headers_out, "Set-Cookie", s.c_str ()); } - - void request:: - parse_parameters (const char* args) - { - for (auto n (args); n != nullptr; ) - { - const char* v (strchr (n, '=')); - const char* e (strchr (n, '&')); - - if (e != nullptr && e < v) - v = nullptr; - - string name (v != nullptr - ? mime_url_decode (n, v) : - (e - ? mime_url_decode (n, e) - : mime_url_decode (n, n + strlen (n)))); - - optional value; - - if (v++) - value = e - ? mime_url_decode (v, e) - : mime_url_decode (v, v + strlen (v)); - - if (!name.empty () || value) - parameters_->emplace_back (move (name), move (value)); - - n = e ? e + 1 : nullptr; - } - } - - const string& request:: - form_data () - { - if (!form_data_) - { - form_data_.reset (new string ()); - - if (rec_->method_number == M_POST) - { - const char* ct (apr_table_get (rec_->headers_in, "Content-Type")); - - if (ct != nullptr && - strncasecmp ("application/x-www-form-urlencoded", ct, 33) == 0) - { - size_t limit (0); - bool rewind (true); - - // Assign some reasonable (64K) input content read/cache limits if - // not done explicitly yet (with the request::content() call). - // Rewind afterwards unless the cache limit is set to zero. - // - if (in_buf_ == nullptr) - limit = 64 * 1024; - else - rewind = in_buf_->cache_limit () > 0; - - istream& istr (content (limit, limit)); - - // Do not throw when eofbit is set (end of stream reached), and - // when failbit is set (getline() failed to extract any character). - // - istream::iostate e (istr.exceptions ()); // Save exception mask. - istr.exceptions (istream::badbit); - getline (istr, *form_data_); - istr.exceptions (e); // Restore exception mask. - - // Rewind the stream unless no buffering were requested beforehand. - // - if (rewind) - rewind_istream (); - } - } - } - - return *form_data_; - } } } diff --git a/web/apache/request.hxx b/web/apache/request.hxx index ba815dc..8c42f57 100644 --- a/web/apache/request.hxx +++ b/web/apache/request.hxx @@ -10,6 +10,7 @@ #include #include // unique_ptr #include +#include #include #include #include @@ -55,6 +56,10 @@ namespace web // class istreambuf_cache; + // Stream type for reading from Apache's bucket brigades. + // + class istream_buckets; + class request: public web::request, public web::response, public stream_state @@ -93,12 +98,25 @@ namespace web // Get request body data stream. // virtual std::istream& - content (size_t limit = 0, size_t buffer = 0); + content (std::size_t limit = 0, std::size_t buffer = 0); // Get request parameters. // virtual const name_values& - parameters (); + parameters (std::size_t limit, bool url_only = false); + + // Get upload stream. + // + virtual std::istream& + open_upload (std::size_t index); + + virtual std::istream& + open_upload (const std::string& name); + + // Get request headers. + // + virtual const name_values& + headers (); // Get request cookies. // @@ -134,16 +152,35 @@ namespace web bool buffer = true); private: - // Get application/x-www-form-urlencoded form data. If request::content() - // was not called yet (and so limits are not specified) then set both of - // them to 64KB. Rewind the stream afterwards, so it's available for the - // application as well, unless no buffering were requested beforehand. + // On the first call cache the application/x-www-form-urlencoded or + // multipart/form-data form data for the subsequent parameters parsing + // and set the multipart flag accordingly. Don't cache if the request is + // in the reading or later state. Return true if the cache contains the + // form data. + // + // Note that the function doesn't change the content buffering (see + // content() function for details) nor rewind the content stream after + // reading. + // + bool + form_data (std::size_t limit); + + // Used to also parse application/x-www-form-urlencoded POST body. // - const std::string& - form_data (); + void + parse_url_parameters (const char* args); void - parse_parameters (const char* args); + parse_multipart_parameters (const std::vector& body); + + // Return a list of the upload input streams. Throw sequence_error if + // the parameters() function was not called yet. Throw invalid_argument + // if the request doesn't contain multipart form data. + // + using uploads_type = std::vector>; + + uploads_type& + uploads () const; // Advance the request processing state. Noop if new state is equal to // the current one. Throw sequence_error if the new state is less then @@ -161,20 +198,27 @@ namespace web virtual void set_write_state () {state (request_state::writing);} - // Rewind the input stream (that must exist). Throw sequence_error if - // some unbuffered content have already been read. - // - void - rewind_istream (); - private: request_rec* rec_; request_state state_ = request_state::initial; path_type path_; + std::unique_ptr parameters_; + bool url_only_parameters_; // Meaningless if parameters_ is NULL; + + // Uploaded file streams. If not NULL, is parallel to the parameters + // list. + // + std::unique_ptr uploads_; + + std::unique_ptr headers_; std::unique_ptr cookies_; - std::unique_ptr form_data_; + + // Form data cache. Is empty if the body doesn't contain the form data. + // + std::unique_ptr> form_data_; + bool form_multipart_; // Meaningless if form_data_ is NULL or empty; std::unique_ptr in_buf_; std::unique_ptr in_; diff --git a/web/apache/service.cxx b/web/apache/service.cxx index b72aa3f..bad98cc 100644 --- a/web/apache/service.cxx +++ b/web/apache/service.cxx @@ -69,8 +69,8 @@ namespace web }; } - // Track if the module is allowed to handle a request in the specific - // configuration scope. The module exemplar will be created (and + // Track if the handler is allowed to handle a request in the specific + // configuration scope. The handler exemplar will be created (and // initialized) only for configuration contexts that have // 'SetHandler ' in effect for the corresponding scope. // diff --git a/web/apache/service.hxx b/web/apache/service.hxx index fca0ea2..2fbcd0a 100644 --- a/web/apache/service.hxx +++ b/web/apache/service.hxx @@ -31,22 +31,22 @@ namespace web // configuration context to the request handler. // // This Apache service implementation first makes a copy of the provided - // (in the constructor below) module exemplar for each directory context. + // (in the constructor below) handler exemplar for each directory context. // It then initializes each of these "context exemplars" with the (merged) // set of configuration options. Finally, when handling a request, it // copies the corresponding "context exemplar" to create the "handling // instance". Note that the "context exemplars" are created as a copy of // the provided exemplar, which is never initialized. As a result, it is - // possible to detect if the module's copy constructor is used to create a - // "context exemplar" or a "handling instance". + // possible to detect if the handler's copy constructor is used to create + // a "context exemplar" or a "handling instance". // class service: ::module { public: // Note that the module exemplar is stored by-reference. // - template - service (const std::string& name, M& exemplar) + template + service (const std::string& name, H& exemplar) : ::module { STANDARD20_MODULE_STUFF, @@ -55,7 +55,7 @@ namespace web nullptr, nullptr, nullptr, - ®ister_hooks + ®ister_hooks #ifdef AP_MODULE_HAS_FLAGS , AP_MODULE_FLAG_NONE @@ -69,15 +69,15 @@ namespace web // Set configuration context management hooks. // // The overall process of building the configuration hierarchy for a - // module is as follows: + // handler is as follows: // // 1. Apache creates directory and server configuration contexts for - // scopes containing module-defined directives by calling the + // scopes containing handler-defined directives by calling the // create_{server,dir}_context() callback functions. For directives // at the server scope the special directory context is created as // well. // - // 2. Apache calls parse_option() function for each module-defined + // 2. Apache calls parse_option() function for each handler-defined // directive. The function parses the directives and places the // resulting options into the corresponding configuration context. // It also establishes the directory-server contexts relations. @@ -89,7 +89,7 @@ namespace web // 4. Apache calls config_finalizer() which complements the directory // contexts options with the ones from the enclosing servers. // - // 5. Apache calls worker_initializer() which creates module exemplar + // 5. Apache calls worker_initializer() which creates handler exemplar // for each directory configuration context that have // 'SetHandler ' directive in effect for it. // @@ -100,14 +100,14 @@ namespace web // create_server_config = &create_server_context; create_dir_config = &create_dir_context; - merge_server_config = &merge_server_context; + merge_server_config = &merge_server_context; - // instance () is invented to delegate processing from apache + // instance () is invented to delegate processing from apache // request handler C function to the service non static member // function. This appoach resticts number of service objects per - // specific module implementation class with just one instance. + // specific handler implementation class with just one instance. // - service*& srv (instance ()); + service*& srv (instance ()); assert (srv == nullptr); srv = this; } @@ -118,7 +118,7 @@ namespace web } private: - template + template static service*& instance () noexcept { @@ -126,45 +126,45 @@ namespace web return instance; } - template + template static void register_hooks (apr_pool_t*) noexcept { // The config_finalizer() function is called at the end of Apache // server configuration parsing. // - ap_hook_post_config (&config_finalizer, NULL, NULL, APR_HOOK_LAST); + ap_hook_post_config (&config_finalizer, NULL, NULL, APR_HOOK_LAST); // The worker_initializer() function is called right after Apache // worker process is started. Called for every new process spawned. // ap_hook_child_init ( - &worker_initializer, NULL, NULL, APR_HOOK_LAST); + &worker_initializer, NULL, NULL, APR_HOOK_LAST); // The request_handler () function is called for each client request. // - ap_hook_handler (&request_handler, NULL, NULL, APR_HOOK_LAST); + ap_hook_handler (&request_handler, NULL, NULL, APR_HOOK_LAST); } - template + template static int config_finalizer (apr_pool_t*, apr_pool_t*, apr_pool_t*, server_rec* s) noexcept { - instance ()->finalize_config (s); + instance ()->finalize_config (s); return OK; } - template + template static void worker_initializer (apr_pool_t*, server_rec* s) noexcept { - auto srv (instance ()); + auto srv (instance ()); log l (s, srv); - srv->template init_worker (l); + srv->template init_worker (l); } - template + template static int request_handler (request_rec* r) noexcept; @@ -176,12 +176,12 @@ namespace web enum class request_handling { // Configuration scope has 'SetHandler ' directive - // specified. The module is allowed to handle a request in the scope. + // specified. The handler is allowed to handle a request in the scope. // allowed, // Configuration scope has 'SetHandler |None' - // directive specified. The module is disallowed to handle a request + // directive specified. The handler is disallowed to handle a request // in the scope. // disallowed, @@ -207,7 +207,7 @@ namespace web // // We will then use the pointers to these context objects as keys in // maps to (1) the corresponding application-level option lists during - // the configuration cycle and to (2) the corresponding module exemplar + // the configuration cycle and to (2) the corresponding handler exemplar // during the HTTP request handling phase. We will also use the same // type for both directory and server configuration contexts. // @@ -267,12 +267,12 @@ namespace web static void* create_dir_context (apr_pool_t*, char* dir) noexcept; - template + template static void* merge_server_context (apr_pool_t*, void* enclosing, void* enclosed) noexcept { - instance ()->complement ( + instance ()->complement ( context_cast (enclosed), context_cast (enclosing)); return enclosed; @@ -298,17 +298,17 @@ namespace web void complement (context* enclosed, context* enclosing); - template + template void init_worker (log&); - template + template int handle (request&, const context*, log&) const; private: std::string name_; - module& exemplar_; + handler& exemplar_; option_descriptions option_descriptions_; // The context objects pointed to by the key can change during the @@ -320,7 +320,7 @@ namespace web // The context objects pointed to by the key can not change during the // request handling phase. // - using exemplars = std::map>; + using exemplars = std::map>; exemplars exemplars_; bool options_parsed_ = false; diff --git a/web/apache/service.txx b/web/apache/service.txx index 36c6826..6b1baad 100644 --- a/web/apache/service.txx +++ b/web/apache/service.txx @@ -15,20 +15,22 @@ namespace web { namespace apache { - template + template void service:: init_worker (log& l) { - const std::string func_name ( + using namespace std; + + const string func_name ( "web::apache::service<" + name_ + ">::init_worker"); try { - const M* exemplar (dynamic_cast (&exemplar_)); + const H* exemplar (dynamic_cast (&exemplar_)); assert (exemplar != nullptr); - // For each directory configuration context, for which the module is - // allowed to handle a request, create the module exemplar as a deep + // For each directory configuration context, for which the handler is + // allowed to handle a request, create the handler exemplar as a deep // copy of the exemplar_ member, and initialize it with the // context-specific option list. // @@ -42,7 +44,7 @@ namespace web auto r ( exemplars_.emplace ( c, - std::unique_ptr (new M (*exemplar)))); + unique_ptr (new H (*exemplar)))); r.first->second->init (o.second, l); } @@ -52,7 +54,7 @@ namespace web // options_.clear (); } - catch (const std::exception& e) + catch (const exception& e) { l.write (nullptr, 0, func_name.c_str (), APLOG_EMERG, e.what ()); @@ -72,7 +74,7 @@ namespace web // create any new ones, it keeps trying to create the worker process // at one-second intervals. // - std::exit (APEXIT_CHILDSICK); + exit (APEXIT_CHILDSICK); } catch (...) { @@ -84,15 +86,15 @@ namespace web // Terminate the worker apache process. // - std::exit (APEXIT_CHILDSICK); + exit (APEXIT_CHILDSICK); } } - template + template int service:: request_handler (request_rec* r) noexcept { - auto srv (instance ()); + auto srv (instance ()); if (!r->handler || srv->name_ != r->handler) return DECLINED; assert (r->per_dir_config != nullptr); @@ -106,14 +108,16 @@ namespace web request rq (r); log lg (r->server, srv); - return srv->template handle (rq, cx, lg); + return srv->template handle (rq, cx, lg); } - template + template int service:: handle (request& rq, const context* cx, log& lg) const { - static const std::string func_name ( + using namespace std; + + static const string func_name ( "web::apache::service<" + name_ + ">::handle"); try @@ -121,14 +125,14 @@ namespace web auto i (exemplars_.find (cx)); assert (i != exemplars_.end ()); - const M* e (dynamic_cast (i->second.get ())); + const H* e (dynamic_cast (i->second.get ())); assert (e != nullptr); - for (M m (*e);;) + for (H h (*e);;) { try { - if (static_cast (m).handle (rq, rq, lg)) + if (static_cast (h).handle (rq, rq, lg)) return rq.flush (); if (rq.state () == request_state::initial) @@ -138,7 +142,7 @@ namespace web "handling declined being partially executed"); break; } - catch (const module::retry&) + catch (const handler::retry&) { // Retry to handle the request. // @@ -152,10 +156,10 @@ namespace web { try { - rq.content (e.status, e.type) << e.content; + rq.content (e.status, e.type) << e.content << endl; return rq.flush (); } - catch (const std::exception& e) + catch (const exception& e) { lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); } @@ -163,7 +167,7 @@ namespace web return e.status; } - catch (const std::exception& e) + catch (const exception& e) { lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); @@ -173,11 +177,11 @@ namespace web { rq.content ( HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8") - << e; + << e << endl; return rq.flush (); } - catch (const std::exception& e) + catch (const exception& e) { lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); } @@ -193,11 +197,11 @@ namespace web { rq.content ( HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8") - << "unknown error"; + << "unknown error" << endl; return rq.flush (); } - catch (const std::exception& e) + catch (const exception& e) { lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); } diff --git a/web/buildfile b/web/buildfile index 6eb5ee5..df5f812 100644 --- a/web/buildfile +++ b/web/buildfile @@ -8,6 +8,7 @@ # While we don't need to link to APR, we need to find its header location. # import libs = libapr1%lib{apr-1} +import libs += libapreq2%lib{apreq2} import libs += libstudxml%lib{studxml} import libs += libbutl%lib{butl} diff --git a/web/module.hxx b/web/module.hxx index de534fb..dd98c29 100644 --- a/web/module.hxx +++ b/web/module.hxx @@ -62,7 +62,7 @@ namespace web sequence_error (std::string d): std::runtime_error (std::move (d)) {} }; - // Map of module configuration option names to the boolean flag indicating + // Map of handler configuration option names to the boolean flag indicating // whether the value is expected for the option. // using option_descriptions = std::map; @@ -90,9 +90,9 @@ namespace web virtual ~request () = default; - // Corresponds to abs_path portion of HTTP URL as described in - // "3.2.2 HTTP URL" of http://tools.ietf.org/html/rfc2616. - // Returns '/' if no abs_path is present in URL. + // Corresponds to abs_path portion of HTTP URL as described in "3.2.2 HTTP + // URL" of http://tools.ietf.org/html/rfc2616. Returns '/' if no abs_path + // is present in URL. // virtual const path_type& path () = 0; @@ -102,10 +102,43 @@ namespace web // in name_values. //@@ Maybe parameter_list() and parameter_map()? // - // Throw invalid_request if decoding of any name or value fails. + // Parse parameters from the URL query part and from the HTTP POST request + // body for the application/x-www-form-urlencoded or multipart/form-data + // content type. Optionally limit the amount of data read from the body + // (see the content() function for the semantics). Throw invalid_request + // if parameters decoding fails. // virtual const name_values& - parameters () = 0; + parameters (std::size_t limit, bool url_only = false) = 0; + + // Open the input stream for the upload corresponding to the specified + // parameter index. Must be called after the parameters() function is + // called, throw sequence_error if that's not the case. Throw + // invalid_argument if the index doesn't have an upload (for example, + // because the parameter is not form field). + // + // Note also that reopening the same upload (within the same retry) + // returns the same stream reference. + // + virtual std::istream& + open_upload (std::size_t index) = 0; + + // As above but specify the parameter by name. Throw invalid_argument if + // there are multiple uploads for this parameter name. + // + virtual std::istream& + open_upload (const std::string& name) = 0; + + // Request headers. + // + // The implementation may add custom pseudo-headers reflecting additional + // request options. Such headers should start with ':'. If possible, the + // implementation should add the following well-known pseudo-headers: + // + // :Client-IP - IP address of the connecting client. + // + virtual const name_values& + headers () = 0; // Throw invalid_request if cookies are malformed. // @@ -126,7 +159,7 @@ namespace web // sequence_error exception being thrown. // virtual std::istream& - content (size_t limit = 0, size_t buffer = 0) = 0; + content (std::size_t limit, std::size_t buffer = 0) = 0; }; class response @@ -145,7 +178,7 @@ namespace web // and the status code is changed, then the old content is // discarded. If the content was not buffered and the status // is changed, then the sequence_error exception is thrown. - // If this exception leaves module::handle(), then the + // If this exception leaves handler::handle(), then the // implementation shall terminate the response in a suitable // but unspecified manner. In particular, there is no guarantee // that the user will be notified of an error or observe the @@ -176,11 +209,11 @@ namespace web bool buffer = true) = 0; }; - // A web server logging backend. The module can use it to log + // A web server logging backend. The handler can use it to log // diagnostics that is meant for the web server operator rather // than the user. // - // The module can cast this basic interface to the web server's + // The handler can cast this basic interface to the web server's // specific implementation that may provide a richer interface. // class log @@ -193,39 +226,39 @@ namespace web write (const char* msg) = 0; }; - // The web server creates a new module instance for each request - // by copy-initializing it with the module exemplar. This way we - // achieve two things: we can freely use module data members + // The web server creates a new handler instance for each request + // by copy-initializing it with the handler exemplar. This way we + // achieve two things: we can freely use handler data members // without worrying about multi-threading issues and we // automatically get started with the initial state for each // request. If you really need to share some rw-data between - // all the modules, use static data members with appropriate + // all the handlers, use static data members with appropriate // locking. See the header in one of the web server // directories (e.g., apache/) if you need to see the code that // does this. // - class module + class handler { public: virtual - ~module () = default; + ~handler () = default; - // Description of configuration options supported by this module. Note: + // Description of configuration options supported by this handler. Note: // should be callable during static initialization. // virtual option_descriptions options () = 0; - // During startup the web server calls this function on the module - // exemplar to log the module version information. It is up to the web - // server whether to call this function once per module implementation + // During startup the web server calls this function on the handler + // exemplar to log the handler version information. It is up to the web + // server whether to call this function once per handler implementation // type. Therefore, it is expected that this function will log the same - // information for all the module exemplars. + // information for all the handler exemplars. // virtual void version (log&) = 0; - // During startup the web server calls this function on the module + // During startup the web server calls this function on the handler // exemplar passing a list of configuration options. The place these // configuration options come from is implementation-specific (normally // a configuration file). The web server guarantees that only options @@ -242,7 +275,7 @@ namespace web // unspecified manner. // // Throw retry if need to retry handling the request. The retry will - // happen on the same instance of the module and the implementation is + // happen on the same instance of the handler and the implementation is // expected to "rewind" the request and response objects to their initial // state. This is only guaranteed to be possible if the relevant functions // in the request and response objects were called in buffered mode (the diff --git a/www/buildfile b/www/buildfile index eebac99..964851a 100644 --- a/www/buildfile +++ b/www/buildfile @@ -10,4 +10,8 @@ define scss: file scss{*}: extension = scss scss{*}: install = data/www/ -./: css{*} scss{*} +define xhtml: file +xhtml{*}: extension = xhtml +xhtml{*}: install = data/www/ + +./: css{*} scss{*} xhtml{*} diff --git a/www/submit-body.css b/www/submit-body.css new file mode 100644 index 0000000..0cb86d1 --- /dev/null +++ b/www/submit-body.css @@ -0,0 +1,22 @@ +/* + * Package archive submission form (based on proplist and form-table) + */ +#submit +{ + margin-top: .8em; + margin-bottom: .8em; + + padding-top: .4em; + padding-bottom: .4em; +} + +#submit th +{ + width: 5.7em; +} + +#submit input +{ + width: 100%; + margin:0; +} diff --git a/www/submit.css b/www/submit.css new file mode 100644 index 0000000..6bab4a4 --- /dev/null +++ b/www/submit.css @@ -0,0 +1,3 @@ +@import url(common.css); +@import url(brep-common.css); +@import url(submit-body.css); diff --git a/www/submit.scss b/www/submit.scss new file mode 100644 index 0000000..3a42ad6 --- /dev/null +++ b/www/submit.scss @@ -0,0 +1,3 @@ +@import "common"; +@import "brep-common"; +@import "submit-body"; diff --git a/www/submit.xhtml b/www/submit.xhtml new file mode 100644 index 0000000..f222b1f --- /dev/null +++ b/www/submit.xhtml @@ -0,0 +1,25 @@ + + +
+ + + + + + + + + + + +
archive + +
sha256
+
+
-- cgit v1.1