From 35359f038f571dc46de3d14af72a2bc911fb0a24 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 18 Mar 2020 22:17:49 +0300 Subject: Implement brep-monitor --- INSTALL | 72 ++- INSTALL-DEV | 4 +- clean/clean.cli | 4 +- clean/clean.cxx | 10 +- doc/buildfile | 9 +- doc/cli.sh | 2 +- doc/manual.cli | 3 +- etc/brep-module.conf | 5 + etc/systemd/brep-clean.timer | 4 +- etc/systemd/brep-monitor.service | 14 + etc/systemd/brep-monitor.timer | 23 + libbrep/build-package.hxx | 2 + libbrep/build.cxx | 21 + libbrep/build.hxx | 82 ++- libbrep/build.xml | 40 ++ libbrep/common.hxx | 30 ++ load/load.cli | 4 +- load/load.cxx | 2 +- migrate/migrate.cli | 4 +- migrate/migrate.cxx | 4 +- mod/.gitignore | 2 +- mod/build-config-module.cxx | 236 +------- mod/build-config-module.hxx | 40 +- mod/build-config.cxx | 249 +++++++++ mod/build-config.hxx | 45 ++ mod/build.cxx | 2 +- mod/buildfile | 31 +- mod/database-module.cxx | 2 +- mod/database-module.hxx | 2 +- mod/mod-build-configs.cxx | 7 +- mod/mod-build-configs.hxx | 2 +- mod/mod-build-force.cxx | 4 +- mod/mod-build-force.hxx | 2 +- mod/mod-build-log.cxx | 4 +- mod/mod-build-log.hxx | 2 +- mod/mod-build-result.cxx | 7 +- mod/mod-build-result.hxx | 2 +- mod/mod-build-task.cxx | 22 +- mod/mod-build-task.hxx | 2 +- mod/mod-builds.cxx | 37 +- mod/mod-builds.hxx | 2 +- mod/mod-ci.cxx | 11 +- mod/mod-ci.hxx | 4 +- mod/mod-package-details.cxx | 9 +- mod/mod-package-details.hxx | 2 +- mod/mod-package-version-details.cxx | 9 +- mod/mod-package-version-details.hxx | 2 +- mod/mod-packages.cxx | 9 +- mod/mod-packages.hxx | 2 +- mod/mod-repository-details.cxx | 9 +- mod/mod-repository-details.hxx | 2 +- mod/mod-repository-root.cxx | 4 +- mod/mod-repository-root.hxx | 2 +- mod/mod-submit.cxx | 7 +- mod/mod-submit.hxx | 4 +- mod/module.cli | 811 ++++++++++++++++++++++++++++ mod/module.cxx | 6 +- mod/module.hxx | 4 +- mod/options.cli | 811 ---------------------------- mod/page.cxx | 7 +- mod/page.hxx | 2 +- mod/services.cxx | 2 +- mod/types-parsers.cxx | 2 +- mod/types-parsers.hxx | 2 +- monitor/.gitignore | 2 + monitor/buildfile | 45 ++ monitor/module.cli | 16 + monitor/monitor.cli | 176 ++++++ monitor/monitor.cxx | 766 ++++++++++++++++++++++++++ tests/web/xhtml/buildfile | 4 +- tests/web/xhtml/driver.cxx | 2 +- web/.gitignore | 1 - web/apache/log.hxx | 80 --- web/apache/request.cxx | 1005 ----------------------------------- web/apache/request.hxx | 233 -------- web/apache/request.ixx | 45 -- web/apache/service.cxx | 268 ---------- web/apache/service.hxx | 333 ------------ web/apache/service.txx | 213 -------- web/apache/stream.hxx | 148 ------ web/buildfile | 18 - web/mime-url-encoding.cxx | 66 --- web/mime-url-encoding.hxx | 32 -- web/module.hxx | 299 ----------- web/server/apache/log.hxx | 80 +++ web/server/apache/request.cxx | 1005 +++++++++++++++++++++++++++++++++++ web/server/apache/request.hxx | 233 ++++++++ web/server/apache/request.ixx | 45 ++ web/server/apache/service.cxx | 268 ++++++++++ web/server/apache/service.hxx | 333 ++++++++++++ web/server/apache/service.txx | 213 ++++++++ web/server/apache/stream.hxx | 148 ++++++ web/server/buildfile | 15 + web/server/mime-url-encoding.cxx | 66 +++ web/server/mime-url-encoding.hxx | 32 ++ web/server/module.hxx | 299 +++++++++++ web/version.hxx.in | 11 - web/xhtml-fragment.cxx | 143 ----- web/xhtml-fragment.hxx | 52 -- web/xhtml.hxx | 358 ------------- web/xhtml/.gitignore | 1 + web/xhtml/buildfile | 10 + web/xhtml/fragment.cxx | 143 +++++ web/xhtml/fragment.hxx | 52 ++ web/xhtml/serialization.hxx | 358 +++++++++++++ web/xhtml/version.hxx.in | 11 + 106 files changed, 5843 insertions(+), 4549 deletions(-) create mode 100644 etc/systemd/brep-monitor.service create mode 100644 etc/systemd/brep-monitor.timer create mode 100644 mod/build-config.cxx create mode 100644 mod/build-config.hxx create mode 100644 mod/module.cli delete mode 100644 mod/options.cli create mode 100644 monitor/.gitignore create mode 100644 monitor/buildfile create mode 100644 monitor/module.cli create mode 100644 monitor/monitor.cli create mode 100644 monitor/monitor.cxx delete mode 100644 web/.gitignore delete mode 100644 web/apache/log.hxx delete mode 100644 web/apache/request.cxx delete mode 100644 web/apache/request.hxx delete mode 100644 web/apache/request.ixx delete mode 100644 web/apache/service.cxx delete mode 100644 web/apache/service.hxx delete mode 100644 web/apache/service.txx delete mode 100644 web/apache/stream.hxx delete mode 100644 web/buildfile delete mode 100644 web/mime-url-encoding.cxx delete mode 100644 web/mime-url-encoding.hxx delete mode 100644 web/module.hxx create mode 100644 web/server/apache/log.hxx create mode 100644 web/server/apache/request.cxx create mode 100644 web/server/apache/request.hxx create mode 100644 web/server/apache/request.ixx create mode 100644 web/server/apache/service.cxx create mode 100644 web/server/apache/service.hxx create mode 100644 web/server/apache/service.txx create mode 100644 web/server/apache/stream.hxx create mode 100644 web/server/buildfile create mode 100644 web/server/mime-url-encoding.cxx create mode 100644 web/server/mime-url-encoding.hxx create mode 100644 web/server/module.hxx delete mode 100644 web/version.hxx.in delete mode 100644 web/xhtml-fragment.cxx delete mode 100644 web/xhtml-fragment.hxx delete mode 100644 web/xhtml.hxx create mode 100644 web/xhtml/.gitignore create mode 100644 web/xhtml/buildfile create mode 100644 web/xhtml/fragment.cxx create mode 100644 web/xhtml/fragment.hxx create mode 100644 web/xhtml/serialization.hxx create mode 100644 web/xhtml/version.hxx.in diff --git a/INSTALL b/INSTALL index 59cc081..00f7975 100644 --- a/INSTALL +++ b/INSTALL @@ -11,8 +11,9 @@ can be omitted. 1. Create 'brep' User This user will be used to run the brep package database loader, build database -cleaner, and the database schemes migration utility. We will also use its home -directory to build and install the brep module, store its configuration, etc. +cleaner, monitor, and database schemas migration utility. We will also use its +home directory to build and install the brep module, store its configuration, +etc. Note: if the deployment machine employs SELinux, then this approach may require additional configuration steps (not shown) in order to allow Apache2 @@ -194,7 +195,7 @@ CREATE EXTENSION citext; Exit psql (^D) -5. Create Database Schemes and Load Repositories +5. Create Database Schemas and Load Repositories $ mkdir config $ edit config/loadtab # Loader configuration, see brep-load(1). @@ -275,7 +276,7 @@ can also find this fragment in install/share/brep/etc/brep-apache2.conf): # brep-email admin@example.org - # Repository host. It specifies the scheme and the host address (but + # Repository host. It specifies the schema and the host address (but # not the root path; see brep-root below) that will be used whenever # brep needs to construct an absolute URL to one of its locations (for # example, a link to a build log that is being send via email). @@ -384,15 +385,15 @@ $ cd install/share/brep/www/ $ for i in *.scss; do sassc -s compressed $i `basename -s .scss $i`.css; done -8. Setup Periodic Loader and Cleaner Execution +8. Setup Periodic Loader, Cleaner, and Monitor Execution Initially this guide suggested using systemd user session support to run the -loader and the cleaner. However, the current state of user sessions has one -major drawback: they are not started/attached-to when logging in with su -l -(see Debian bug #813789 for details). This limitation makes them unusable in -our setup. If you still would like to use systemd to run the loader and the -cleaner, then you can set it up as a system-wide service which runs the -utilities as the brep user/group. Otherwise, a cron job is a natural choice. +loader, cleaner, and monitor. However, the current state of user sessions has +one major drawback: they are not started/attached-to when logging in with su +-l (see Debian bug #813789 for details). This limitation makes them unusable +in our setup. If you still would like to use systemd to run the utilities, +then you can set it up as a system-wide service which runs them as the brep +user/group. Otherwise, a cron job is a natural choice. Note that the builds cleaner execution is optional and is only required if the build2 build bot functionality is enabled (see the build bot documentation for @@ -402,29 +403,37 @@ parts in the subsequent subsections. If the CI request functionality is enabled you most likely will want to additionally setup the tenants cleanup. +The monitor execution is also optional and currently only makes sense if the +build2 build bot functionality is enabled. Note that you may need to replace +the public toolchain name argument in the monitor utility command with a real +list of toolchain names (and optionally versions) used in the brep build +infrastructure. -8.a Setup Periodic Loader and Cleaner Execution with cron -The following crontab entries will execute the loader every five minutes -and the tenants and builds cleaners once a day at midnight: +8.a Setup Periodic Loader, Cleaner, and Monitor Execution with cron + +The following crontab entries will execute the loader every five minutes, the +tenants and builds cleaners once a day at midnight, and the monitor every hour +(all shifted by a few minutes in order not to clash with other jobs): $ crontab -l MAILTO= PATH=/usr/local/bin:/bin:/usr/bin */5 * * * * $HOME/install/bin/brep-load $HOME/config/loadtab -0 0 * * * $HOME/install/bin/brep-clean tenants 240 -0 0 * * * $HOME/install/bin/brep-clean builds $HOME/config/buildtab +1 0 * * * $HOME/install/bin/brep-clean tenants 240 +2 0 * * * $HOME/install/bin/brep-clean builds $HOME/config/buildtab +3 * * * * $HOME/install/bin/brep-monitor --report-timeout 86400 --clean $HOME/config/brep-module.conf public ^D Note that here we assume that bpkg (which is executed by brep-load) is in one of the PATH's directories (usually /usr/local/bin). -8.b Setup Periodic Loader and Cleaner Execution with systemd +8.b Setup Periodic Loader, Cleaner, and Monitor Execution with systemd In this version we will use the systemd user session to periodically run the -loader and the cleaner as the brep user. If your installation doesn't use -systemd, then a cron job would be a natural alternative (see above). +loader, cleaner, and monitor as the brep user. If your installation doesn't +use systemd, then a cron job would be a natural alternative (see above). As the first step, make sure systemd user sessions support is working for the brep user: @@ -443,6 +452,7 @@ $ sudo loginctl enable-linger brep $ mkdir -p .config/systemd/user $ cp install/share/brep/etc/systemd/brep-load.* .config/systemd/user/ $ cp install/share/brep/etc/systemd/brep-clean.* .config/systemd/user/ +$ cp install/share/brep/etc/systemd/brep-monitor.* .config/systemd/user/ Start the service to make sure there are no issues: @@ -452,16 +462,21 @@ $ journalctl $ systemctl --user start brep-clean.service $ journalctl +$ systemctl --user start brep-monitor.service +$ journalctl + Start the timers and monitor them to make sure they fire: $ systemctl --user start brep-load.timer $ systemctl --user start brep-clean.timer +$ systemctl --user start brep-monitor.timer $ journalctl -f If everything looks good, enable the timer to be started at boot time: $ systemctl --user enable brep-load.timer $ systemctl --user enable brep-clean.timer +$ systemctl --user enable brep-monitor.timer 9. Upgrade Procedure @@ -483,18 +498,20 @@ $ cd brep $ bpkg fetch $ bpkg build brep -If you are using a systemd-based setup, then stop and disable the loader and -the cleaner: +If you are using a systemd-based setup, then stop and disable the loader, +cleaner, and monitor: $ systemctl --user disable --now brep-load.timer $ systemctl --user disable --now brep-clean.timer +$ systemctl --user disable --now brep-monitor.timer $ systemctl --user stop brep-load.service $ systemctl --user stop brep-clean.service +$ systemctl --user stop brep-monitor.service If you are using a cron-based setup, then it is not worth it commenting out the -job entries. If the new version of the loader or the cleaner gets executed -before or during the migration, then it will fail and you will get an email -with the diagnostics. Other than that, it should be harmless. +job entries. If the new version of the brep utilities gets executed before or +during the migration, then it will fail and you will get an email with the +diagnostics. Other than that, it should be harmless. Stop apache: @@ -510,7 +527,7 @@ Review brep-module.conf changes that may need to be merged: $ diff -u install/share/brep/etc/brep-module.conf config/brep-module.conf -Migrate database schemes: +Migrate database schemas: $ install/bin/brep-migrate package $ install/bin/brep-migrate build @@ -521,17 +538,20 @@ is not possible), then one way to do it would be: $ psql -d brep_package -c 'DROP OWNED BY brep' $ psql -d brep_build -c 'DROP OWNED BY brep' -If using systemd, then start and enable the loader and the cleaner: +If using systemd, then start and enable the loader, cleaner, and monitor: $ systemctl --user start brep-load.service $ systemctl --user status brep-load.service $ systemctl --user start brep-clean.service $ systemctl --user status brep-clean.service +$ systemctl --user start brep-monitor.service +$ systemctl --user status brep-monitor.service If everything looks good, enable periodic execution: $ systemctl --user enable --now brep-load.timer $ systemctl --user enable --now brep-clean.timer +$ systemctl --user enable --now brep-monitor.timer If using cron, then simply wait for the next run. diff --git a/INSTALL-DEV b/INSTALL-DEV index 101d9d7..af5c06e 100644 --- a/INSTALL-DEV +++ b/INSTALL-DEV @@ -113,7 +113,7 @@ CREATE EXTENSION citext; Exit psql (^D) -2. Create Database Schemes and Load the Repository +2. Create Database Schemas and Load the Repository All the commands are executed from brep project root. @@ -205,7 +205,7 @@ $ sudo tail -f /var/log/apache2/error.log 4. Reloading During Development -To do a "complete reload" (i.e., recreate database schemes, load the repository +To do a "complete reload" (i.e., recreate database schemas, load the repository data, and reload the Apache2 plugin), execute the following from brep/: $ migrate/brep-migrate --recreate package diff --git a/clean/clean.cli b/clean/clean.cli index 3c710fe..d3be4d6 100644 --- a/clean/clean.cli +++ b/clean/clean.cli @@ -127,8 +127,8 @@ Fatal error.| \li|\cb{2} -An instance of \cb{brep-clean} or \l{brep-migrate(1)} is already running. Try -again.| +An instance of \cb{brep-clean} or some other \cb{brep} utility is already +running. Try again.| \li|\cb{3} diff --git a/clean/clean.cxx b/clean/clean.cxx index d7a7731..5401ab1 100644 --- a/clean/clean.cxx +++ b/clean/clean.cxx @@ -111,8 +111,8 @@ namespace brep ops.db_port (), "options='-c default_transaction_isolation=serializable'"); - // Prevent several brep-clean/migrate instances from updating build - // database simultaneously. + // Prevent several brep utility instances from updating the database + // simultaneously. // database_lock l (db); @@ -316,6 +316,12 @@ namespace brep ? i->second : default_timeout); + // @@ Note that this approach doesn't consider the case when both + // the configuration and the package still exists but the package + // now excludes the configuration (configuration is now of the + // legacy class instead of the default class, etc). We should + // probably re-implement it in a way brep-monitor does it. + // bool cleanup ( // Check that the build is not stale. // diff --git a/doc/buildfile b/doc/buildfile index 2595879..f0a9387 100644 --- a/doc/buildfile +++ b/doc/buildfile @@ -1,10 +1,11 @@ # file : doc/buildfile # license : MIT; see accompanying LICENSE file -cmds = \ -brep-clean \ -brep-load \ -brep-migrate +cmds = \ +brep-clean \ +brep-load \ +brep-migrate \ +brep-monitor ./: {man1 xhtml}{$cmds} \ css{common pre-box man} \ diff --git a/doc/cli.sh b/doc/cli.sh index b8c4c00..42303dc 100755 --- a/doc/cli.sh +++ b/doc/cli.sh @@ -71,7 +71,7 @@ o="--output-prefix brep-" # #compile "brep" $o --output-prefix "" -pages="clean/clean load/load migrate/migrate" +pages="clean/clean load/load migrate/migrate monitor/monitor" for p in $pages; do compile $p $o diff --git a/doc/manual.cli b/doc/manual.cli index 0b3b038..71a25a5 100644 --- a/doc/manual.cli +++ b/doc/manual.cli @@ -15,7 +15,8 @@ This document describes \c{brep}, the \c{build2} package repository web interface. For the command line interface of \c{brep} utilities refer to the -\l{brep-load(1)}, \l{brep-clean(1)}, and \l{brep-migrate(1)} man pages. +\l{brep-load(1)}, \l{brep-clean(1)}, \l{brep-migrate(1)}, and +\l{brep-monitor(1)} man pages. \h1#submit|Package Submission| diff --git a/etc/brep-module.conf b/etc/brep-module.conf index 458261e..12e96cd 100644 --- a/etc/brep-module.conf +++ b/etc/brep-module.conf @@ -3,6 +3,11 @@ # brep-). See brep(1) for detailed description of each configuration option. # Commented out options indicate their default values. # +# Besides being parsed by the brep module, this file may also be parsed by +# brep utilities that are normally only interested in the subset of the +# options. To simplify skipping of unrecognized, this file must always have an +# option name and its value on the same line. +# # Package search page title. It is placed inside XHTML5 element. # diff --git a/etc/systemd/brep-clean.timer b/etc/systemd/brep-clean.timer index f4c587e..8e1e6e7 100644 --- a/etc/systemd/brep-clean.timer +++ b/etc/systemd/brep-clean.timer @@ -10,9 +10,9 @@ Unit=brep-clean.service # Persistent=false -# Wait 20 seconds until the first run. +# Wait 30 seconds until the first run. # -OnBootSec=20 +OnBootSec=30 # Then wait 5 minutes until the next run. # diff --git a/etc/systemd/brep-monitor.service b/etc/systemd/brep-monitor.service new file mode 100644 index 0000000..0a5c25e --- /dev/null +++ b/etc/systemd/brep-monitor.service @@ -0,0 +1,14 @@ +[Unit] +Description=brep infrastructure monitor service + +[Service] +Type=oneshot +#User=brep +#Group=brep + +# Replace the public toolchain name with a real list of toolchains. +# +ExecStart=/home/brep/install/bin/brep-monitor --report-timeout 86400 --clean /home/brep/config/brep-module.conf public + +[Install] +WantedBy=default.target diff --git a/etc/systemd/brep-monitor.timer b/etc/systemd/brep-monitor.timer new file mode 100644 index 0000000..f5f5a64 --- /dev/null +++ b/etc/systemd/brep-monitor.timer @@ -0,0 +1,23 @@ +[Unit] +Description=brep infrastructure monitor timer +RefuseManualStart=no +RefuseManualStop=no + +[Timer] +Unit=brep-monitor.service + +# Don't keep track of the timer across reboots. +# +Persistent=false + +# Wait 40 seconds until the first run. +# +OnBootSec=40 + +# Then wait 1 hour until the next run. +# +OnUnitInactiveSec=1h + + +[Install] +WantedBy=timers.target diff --git a/libbrep/build-package.hxx b/libbrep/build-package.hxx index 22a8151..702f937 100644 --- a/libbrep/build-package.hxx +++ b/libbrep/build-package.hxx @@ -118,6 +118,8 @@ namespace brep package_id id; upstream_version version; + bool archived; // True if the tenant the package belongs to is archived. + // Database mapping. // #pragma db member(version) set(this.version.init (this.id.version, (?))) diff --git a/libbrep/build.cxx b/libbrep/build.cxx index b6a07c7..db5bda2 100644 --- a/libbrep/build.cxx +++ b/libbrep/build.cxx @@ -80,4 +80,25 @@ namespace brep target (move (trg)) { } + + // build_delay + // + build_delay:: + build_delay (string tnt, + package_name_type pnm, version pvr, + string cfg, + string tnm, version tvr, + timestamp ptm) + : id (package_id (move (tnt), move (pnm), pvr), + move (cfg), + move (tnm), tvr), + tenant (id.package.tenant), + package_name (id.package.name), + package_version (move (pvr)), + configuration (id.configuration), + toolchain_name (id.toolchain_name), + toolchain_version (move (tvr)), + package_timestamp (ptm) + { + } } diff --git a/libbrep/build.hxx b/libbrep/build.hxx index 7e548a4..83b30a8 100644 --- a/libbrep/build.hxx +++ b/libbrep/build.hxx @@ -25,7 +25,7 @@ // #define LIBBREP_BUILD_SCHEMA_VERSION_BASE 9 -#pragma db model version(LIBBREP_BUILD_SCHEMA_VERSION_BASE, 9, closed) +#pragma db model version(LIBBREP_BUILD_SCHEMA_VERSION_BASE, 10, closed) // We have to keep these mappings at the global scope instead of inside // the brep namespace because they need to be also effective in the @@ -212,6 +212,16 @@ namespace brep // optional<result_status> status; + // Time of setting the result status that can be considered as the build + // task completion (currently all the result_status values). Initialized + // with timestamp_nonexistent by default. + // + // Note that in the future we may not consider abort and abnormal as the + // task completion and, for example, proceed with automatic rebuild (the + // flake monitor idea). + // + timestamp_type completion_timestamp; + // May be present only for the building state. // optional<string> agent_fingerprint; @@ -244,6 +254,10 @@ namespace brep // #pragma db member(timestamp) index + // @@ TMP remove when 0.13.0 is released. + // + #pragma db member(completion_timestamp) default(0) + #pragma db member(results) id_column("") value_column("") \ section(results_section) @@ -259,9 +273,7 @@ namespace brep : tenant (id.package.tenant), package_name (id.package.name), configuration (id.configuration), - toolchain_name (id.toolchain_name) - { - } + toolchain_name (id.toolchain_name) {} }; // Note that ADL can't find the equal operator in join conditions, so we use @@ -340,6 +352,68 @@ namespace brep // #pragma db member(result) column("count(" + build::id.package.name + ")") }; + + // Used to track the package build delays since the last build or, if not + // present, since the first opportunity to build the package. + // + #pragma db object pointer(shared_ptr) session + class build_delay + { + public: + using package_name_type = brep::package_name; + + // If toolchain version is empty, then the object represents a minimum + // delay across all versions of the toolchain. + // + build_delay (string tenant, + package_name_type, version, + string configuration, + string toolchain_name, version toolchain_version, + timestamp package_timestamp); + + build_id id; + + string& tenant; // Tracks id.package.tenant. + package_name_type& package_name; // Tracks id.package.name. + upstream_version package_version; // Original of id.package.version. + string& configuration; // Tracks id.configuration. + string& toolchain_name; // Tracks id.toolchain_name. + upstream_version toolchain_version; // Original of id.toolchain_version. + + // Time of the latest delay report. Initialized with timestamp_nonexistent + // by default. + // + timestamp report_timestamp; + + // Time when the package is initially considered as buildable for this + // configuration and toolchain. It is used to track the build delay if the + // build object is absent (the first build task is not yet issued, the + // build is removed by brep-clean, etc). + // + timestamp package_timestamp; + + // Database mapping. + // + #pragma db member(id) id column("") + + #pragma db member(tenant) transient + #pragma db member(package_name) transient + #pragma db member(package_version) \ + set(this.package_version.init (this.id.package.version, (?))) + #pragma db member(configuration) transient + #pragma db member(toolchain_name) transient + #pragma db member(toolchain_version) \ + set(this.toolchain_version.init (this.id.toolchain_version, (?))) + + private: + friend class odb::access; + + build_delay () + : tenant (id.package.tenant), + package_name (id.package.name), + configuration (id.configuration), + toolchain_name (id.toolchain_name) {} + }; } #endif // LIBBREP_BUILD_HXX diff --git a/libbrep/build.xml b/libbrep/build.xml index 3ade7c8..bf8920b 100644 --- a/libbrep/build.xml +++ b/libbrep/build.xml @@ -1,4 +1,44 @@ <changelog xmlns="http://www.codesynthesis.com/xmlns/odb/changelog" database="pgsql" schema-name="build" version="1"> + <changeset version="10"> + <alter-table name="build"> + <add-column name="completion_timestamp" type="BIGINT" null="false" default="0"/> + </alter-table> + <add-table name="build_delay" kind="object"> + <column name="package_tenant" type="TEXT" null="false"/> + <column name="package_name" type="CITEXT" null="false"/> + <column name="package_version_epoch" type="INTEGER" null="false"/> + <column name="package_version_canonical_upstream" type="TEXT" null="false"/> + <column name="package_version_canonical_release" type="TEXT" null="false" options="COLLATE "C""/> + <column name="package_version_revision" type="INTEGER" null="false"/> + <column name="configuration" type="TEXT" null="false"/> + <column name="toolchain_name" type="TEXT" null="false"/> + <column name="toolchain_version_epoch" type="INTEGER" null="false"/> + <column name="toolchain_version_canonical_upstream" type="TEXT" null="false"/> + <column name="toolchain_version_canonical_release" type="TEXT" null="false" options="COLLATE "C""/> + <column name="toolchain_version_revision" type="INTEGER" null="false"/> + <column name="package_version_upstream" type="TEXT" null="false"/> + <column name="package_version_release" type="TEXT" null="true"/> + <column name="toolchain_version_upstream" type="TEXT" null="false"/> + <column name="toolchain_version_release" type="TEXT" null="true"/> + <column name="report_timestamp" type="BIGINT" null="false"/> + <column name="package_timestamp" type="BIGINT" null="false"/> + <primary-key> + <column name="package_tenant"/> + <column name="package_name"/> + <column name="package_version_epoch"/> + <column name="package_version_canonical_upstream"/> + <column name="package_version_canonical_release"/> + <column name="package_version_revision"/> + <column name="configuration"/> + <column name="toolchain_name"/> + <column name="toolchain_version_epoch"/> + <column name="toolchain_version_canonical_upstream"/> + <column name="toolchain_version_canonical_release"/> + <column name="toolchain_version_revision"/> + </primary-key> + </add-table> + </changeset> + <model version="9"> <table name="build" kind="object"> <column name="package_tenant" type="TEXT" null="false"/> diff --git a/libbrep/common.hxx b/libbrep/common.hxx index 44028df..b7fc2da 100644 --- a/libbrep/common.hxx +++ b/libbrep/common.hxx @@ -8,6 +8,8 @@ #include <chrono> #include <type_traits> // static_assert +#include <odb/query.hxx> + #include <libbpkg/package-name.hxx> #include <libbrep/types.hxx> @@ -510,6 +512,34 @@ namespace brep compare_version_ne (x.version, y.version, true); } + // Allow comparing the query members with the query parameters bound by + // reference to variables of the package id type (in particular in the + // prepared queries). + // + // Note that it is not operator==() since the query template parameter type + // can not be deduced from the function parameter types and needs to be + // specified explicitly. + // + template <typename T, typename ID> + inline auto + equal (const ID& x, const package_id& y) + -> decltype (x.tenant == odb::query<T>::_ref (y.tenant) && + x.name == odb::query<T>::_ref (y.name) && + x.version.epoch == odb::query<T>::_ref (y.version.epoch)) + { + using query = odb::query<T>; + + const auto& qv (x.version); + const canonical_version& v (y.version); + + return x.tenant == query::_ref (y.tenant) && + x.name == query::_ref (y.name) && + qv.epoch == query::_ref (v.epoch) && + qv.canonical_upstream == query::_ref (v.canonical_upstream) && + qv.canonical_release == query::_ref (v.canonical_release) && + qv.revision == query::_ref (v.revision); + } + // Repository id comparison operators. // inline bool diff --git a/load/load.cli b/load/load.cli index 1fc2e17..be19ebf 100644 --- a/load/load.cli +++ b/load/load.cli @@ -161,8 +161,8 @@ Fatal error.| \li|\cb{2} -An instance of \cb{brep-load} or \l{brep-migrate(1)} is already running. Try -again.| +An instance of \cb{brep-load} or some other \cb{brep} utility is already +running. Try again.| \li|\cb{3} diff --git a/load/load.cxx b/load/load.cxx index 5638553..bf8584c 100644 --- a/load/load.cxx +++ b/load/load.cxx @@ -1276,7 +1276,7 @@ try ops.db_port (), "options='-c default_transaction_isolation=serializable'"); - // Prevent several brep-load/migrate instances from updating DB + // Prevent several brep utility instances from updating the package database // simultaneously. // database_lock l (db); diff --git a/migrate/migrate.cli b/migrate/migrate.cli index ba9a572..177f991 100644 --- a/migrate/migrate.cli +++ b/migrate/migrate.cli @@ -125,8 +125,8 @@ Fatal error.| \li|\cb{2} -An instance of \cb{brep-migrate} or \l{brep-load(1)} is already running. Try -again.| +An instance of \cb{brep-migrate} or some other \cb{brep} utility is already +running. Try again.| \li|\cb{3} diff --git a/migrate/migrate.cxx b/migrate/migrate.cxx index 8b083fc..81c4543 100644 --- a/migrate/migrate.cxx +++ b/migrate/migrate.cxx @@ -300,12 +300,12 @@ try ops.db_port (), "options='-c default_transaction_isolation=serializable'"); - // Prevent several brep-migrate/load instances from updating DB + // Prevent several brep utility instances from updating the database // simultaneously. // database_lock l (db); - // Currently we don't support data migration for the manual database scheme + // Currently we don't support data migration for the manual database schema // migration. // if (db.schema_migration (db_schema)) diff --git a/mod/.gitignore b/mod/.gitignore index c6e608b..6b64ad0 100644 --- a/mod/.gitignore +++ b/mod/.gitignore @@ -1 +1 @@ -options.?xx +*-options.?xx diff --git a/mod/build-config-module.cxx b/mod/build-config-module.cxx index b1818b7..831cb78 100644 --- a/mod/build-config-module.cxx +++ b/mod/build-config-module.cxx @@ -9,10 +9,9 @@ #include <sstream> #include <libbutl/sha256.mxx> -#include <libbutl/utility.mxx> // throw_generic_error(), alpha(), etc. +#include <libbutl/utility.mxx> // throw_generic_error() #include <libbutl/openssl.mxx> #include <libbutl/filesystem.mxx> // dir_iterator, dir_entry -#include <libbutl/path-pattern.mxx> namespace brep { @@ -157,184 +156,6 @@ namespace brep build_conf_map_ = make_shared<conf_map_type> (move (conf_map)); } - // The default underlying class set expression (see below). - // - static const build_class_expr default_ucs_expr ( - {"default"}, '+', "Default."); - - bool build_config_module:: - exclude (const small_vector<build_class_expr, 1>& exprs, - const vector<build_constraint>& constrs, - const build_config& cfg, - string* reason) const - { - // Save the first sentence of the reason, lower-case the first letter if - // the beginning looks like a word (all subsequent characters until a - // whitespace are lower-case letters). - // - auto sanitize = [] (const string& reason) - { - string r (reason.substr (0, reason.find ('.'))); - - char c (r[0]); // Can be '\0'. - if (alpha (c) && c == ucase (c)) - { - bool word (true); - - for (size_t i (1); - i != r.size () && (c = r[i]) != ' ' && c != '\t' && c != '\n'; - ++i) - { - // Is not a word if contains a non-letter or an upper-case letter. - // - if (!alpha (c) || c == ucase (c)) - { - word = false; - break; - } - } - - if (word) - r[0] = lcase (r[0]); - } - - return r; - }; - - // First, match the configuration against the package underlying build - // class set and expressions. - // - bool m (false); - - // Match the configuration against an expression, updating the match - // result. - // - // We will use a comment of the first encountered excluding expression - // (changing the result from true to false) or non-including one (leaving - // the false result) as an exclusion reason. - // - auto match = [&cfg, &m, reason, &sanitize, this] - (const build_class_expr& e) - { - bool pm (m); - e.match (cfg.classes, build_conf_->class_inheritance_map, m); - - if (reason != nullptr) - { - // Reset the reason which, if saved, makes no sense anymore. - // - if (m) - { - reason->clear (); - } - else if (reason->empty () && - // - // Exclusion. - // - (pm || - // - // Non-inclusion. Make sure that the build class expression - // is empty or starts with an addition (+...). - // - e.expr.empty () || - e.expr.front ().operation == '+')) - { - *reason = sanitize (e.comment); - } - } - }; - - // Determine the underlying class set. Note that in the future we can - // potentially extend the underlying set with special classes. - // - const build_class_expr* ucs ( - !exprs.empty () && !exprs.front ().underlying_classes.empty () - ? &exprs.front () - : nullptr); - - // Note that the combined package build configuration class expression can - // be represented as the underlying class set used as a starting set for - // the original expressions and a restricting set, simultaneously. For - // example, for the expression: - // - // default legacy : -msvc - // - // the resulting expression will be: - // - // +( +default +legacy ) -msvc &( +default +legacy ) - // - // Let's, however, optimize it a bit based on the following facts: - // - // - If the underlying class set expression (+default +legacy in the above - // example) evaluates to false, then the resulting expression also - // evaluates to false due to the trailing '&' operation. Thus, we don't - // need to evaluate further if that's the case. - // - // - On the other hand, if the underlying class set expression evaluates - // to true, then we don't need to apply the trailing '&' operation as it - // cannot affect the result. - // - const build_class_expr& ucs_expr ( - ucs != nullptr - ? build_class_expr (ucs->underlying_classes, '+', ucs->comment) - : default_ucs_expr); - - match (ucs_expr); - - if (m) - { - for (const build_class_expr& e: exprs) - match (e); - } - - // Exclude the configuration if it doesn't match the compound expression. - // - if (!m) - return true; - - // Now check if the configuration is excluded/included via the patterns. - // - // To implement matching of absent name components with wildcard-only - // pattern components we are going to convert names to paths (see - // dash_components_to_path() for details). - // - // And if any of the build-{include,exclude} values (which is legal) or - // the build configuration name/target (illegal) are invalid paths, then - // we assume no match. - // - if (!constrs.empty ()) - try - { - path cn (dash_components_to_path (cfg.name)); - path tg (dash_components_to_path (cfg.target.string ())); - - for (const build_constraint& c: constrs) - { - if (path_match (cn, - dash_components_to_path (c.config), - dir_path () /* start */, - path_match_flags::match_absent) && - (!c.target || - path_match (tg, - dash_components_to_path (*c.target), - dir_path () /* start */, - path_match_flags::match_absent))) - { - if (!c.exclusion) - return false; - - if (reason != nullptr) - *reason = sanitize (c.comment); - - return true; - } - } - } - catch (const invalid_path&) {} - - return false; - } - bool build_config_module:: belongs (const bbot::build_config& cfg, const char* cls) const { @@ -360,59 +181,4 @@ namespace brep return false; } - - path build_config_module:: - dash_components_to_path (const string& pattern) - { - string r; - size_t nstar (0); - for (const path_pattern_term& pt: path_pattern_iterator (pattern)) - { - switch (pt.type) - { - case path_pattern_term_type::star: - { - // Replace ** with */**/* and skip all the remaining stars that may - // follow in this sequence. - // - if (nstar == 0) - r += "*"; - else if (nstar == 1) - r += "/**/*"; // The first star is already copied. - - break; - } - case path_pattern_term_type::literal: - { - // Replace '-' with '/' and fall through otherwise. - // - if (get_literal (pt) == '-') - { - r += '/'; - break; - } - } - // Fall through. - default: - { - r.append (pt.begin, pt.end); // Copy the pattern term as is. - } - } - - nstar = pt.star () ? nstar + 1 : 0; - } - - // Append the trailing slash to match the resulting paths as directories. - // This is required for the trailing /* we could append to match absent - // directory path components (see path_match_flags::match_absent for - // details). - // - // Note that valid dash components may not contain a trailing dash. - // Anyway, any extra trailing slashes will be ignored by the path - // constructor. - // - r += '/'; - - return path (move (r)); - } } diff --git a/mod/build-config-module.hxx b/mod/build-config-module.hxx index 4b23056..04fd5b1 100644 --- a/mod/build-config-module.hxx +++ b/mod/build-config-module.hxx @@ -5,7 +5,6 @@ #define MOD_BUILD_CONFIG_MODULE_HXX #include <map> -#include <algorithm> // find() #include <libbutl/utility.mxx> // compare_c_string @@ -16,8 +15,8 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/module.hxx> -#include <mod/options.hxx> +#include <mod/build-config.hxx> +#include <mod/module-options.hxx> // Base class for modules that utilize the build controller configuration. // @@ -39,17 +38,18 @@ namespace brep void init (const options::build&); - // Return true if the specified build configuration is excluded by a - // package based on its underlying build class set, build class - // expressions, and build constraints, potentially extending the - // underlying set with the special classes. Set the exclusion reason if - // requested. - // bool - exclude (const small_vector<bpkg::build_class_expr, 1>&, - const vector<bpkg::build_constraint>&, - const bbot::build_config&, - string* reason = nullptr) const; + exclude (const small_vector<bpkg::build_class_expr, 1>& exprs, + const vector<bpkg::build_constraint>& constrs, + const bbot::build_config& cfg, + string* reason = nullptr) const + { + return brep::exclude (exprs, + constrs, + cfg, + build_conf_->class_inheritance_map, + reason); + } // Check if the configuration belongs to the specified class. // @@ -62,20 +62,6 @@ namespace brep return belongs (cfg, cls.c_str ()); } - // Convert dash-separated components (target, build configuration name, - // machine name) or a pattern thereof into a path, replacing dashes with - // slashes (directory separators), `**` with `*/**/*`, and appending the - // trailing slash for a subsequent match using the path_match() - // functionality (the idea here is for `linux**` to match `linux-gcc` - // which is quite natural to expect). Throw invalid_path if the resulting - // path is invalid. - // - // Note that the match_absent path match flag must be used for the above - // `**` transformation to work. - // - static path - dash_components_to_path (const string&); - // Configuration/toolchain combination that, in particular, can be used as // a set value. // diff --git a/mod/build-config.cxx b/mod/build-config.cxx new file mode 100644 index 0000000..2d64aec --- /dev/null +++ b/mod/build-config.cxx @@ -0,0 +1,249 @@ +// file : mod/build-config-module.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/build-config.hxx> + +#include <libbutl/utility.mxx> // alpha(), etc. +#include <libbutl/path-pattern.mxx> + +namespace brep +{ + using namespace std; + using namespace butl; + using namespace bpkg; + using namespace bbot; + + // The default underlying class set expression (see below). + // + static const build_class_expr default_ucs_expr ( + {"default"}, '+', "Default."); + + bool + exclude (const small_vector<build_class_expr, 1>& exprs, + const vector<build_constraint>& constrs, + const build_config& cfg, + const map<string, string>& class_inheritance_map, + string* reason) + { + // Save the first sentence of the reason, lower-case the first letter if + // the beginning looks like a word (all subsequent characters until a + // whitespace are lower-case letters). + // + auto sanitize = [] (const string& reason) + { + string r (reason.substr (0, reason.find ('.'))); + + char c (r[0]); // Can be '\0'. + if (alpha (c) && c == ucase (c)) + { + bool word (true); + + for (size_t i (1); + i != r.size () && (c = r[i]) != ' ' && c != '\t' && c != '\n'; + ++i) + { + // Is not a word if contains a non-letter or an upper-case letter. + // + if (!alpha (c) || c == ucase (c)) + { + word = false; + break; + } + } + + if (word) + r[0] = lcase (r[0]); + } + + return r; + }; + + // First, match the configuration against the package underlying build + // class set and expressions. + // + bool m (false); + + // Match the configuration against an expression, updating the match + // result. + // + // We will use a comment of the first encountered excluding expression + // (changing the result from true to false) or non-including one (leaving + // the false result) as an exclusion reason. + // + auto match = [&cfg, &m, reason, &sanitize, &class_inheritance_map] + (const build_class_expr& e) + { + bool pm (m); + e.match (cfg.classes, class_inheritance_map, m); + + if (reason != nullptr) + { + // Reset the reason which, if saved, makes no sense anymore. + // + if (m) + { + reason->clear (); + } + else if (reason->empty () && + // + // Exclusion. + // + (pm || + // + // Non-inclusion. Make sure that the build class expression + // is empty or starts with an addition (+...). + // + e.expr.empty () || + e.expr.front ().operation == '+')) + { + *reason = sanitize (e.comment); + } + } + }; + + // Determine the underlying class set. Note that in the future we can + // potentially extend the underlying set with special classes. + // + const build_class_expr* ucs ( + !exprs.empty () && !exprs.front ().underlying_classes.empty () + ? &exprs.front () + : nullptr); + + // Note that the combined package build configuration class expression can + // be represented as the underlying class set used as a starting set for + // the original expressions and a restricting set, simultaneously. For + // example, for the expression: + // + // default legacy : -msvc + // + // the resulting expression will be: + // + // +( +default +legacy ) -msvc &( +default +legacy ) + // + // Let's, however, optimize it a bit based on the following facts: + // + // - If the underlying class set expression (+default +legacy in the above + // example) evaluates to false, then the resulting expression also + // evaluates to false due to the trailing '&' operation. Thus, we don't + // need to evaluate further if that's the case. + // + // - On the other hand, if the underlying class set expression evaluates + // to true, then we don't need to apply the trailing '&' operation as it + // cannot affect the result. + // + const build_class_expr& ucs_expr ( + ucs != nullptr + ? build_class_expr (ucs->underlying_classes, '+', ucs->comment) + : default_ucs_expr); + + match (ucs_expr); + + if (m) + { + for (const build_class_expr& e: exprs) + match (e); + } + + // Exclude the configuration if it doesn't match the compound expression. + // + if (!m) + return true; + + // Now check if the configuration is excluded/included via the patterns. + // + // To implement matching of absent name components with wildcard-only + // pattern components we are going to convert names to paths (see + // dash_components_to_path() for details). + // + // And if any of the build-{include,exclude} values (which is legal) or + // the build configuration name/target (illegal) are invalid paths, then + // we assume no match. + // + if (!constrs.empty ()) + try + { + path cn (dash_components_to_path (cfg.name)); + path tg (dash_components_to_path (cfg.target.string ())); + + for (const build_constraint& c: constrs) + { + if (path_match (cn, + dash_components_to_path (c.config), + dir_path () /* start */, + path_match_flags::match_absent) && + (!c.target || + path_match (tg, + dash_components_to_path (*c.target), + dir_path () /* start */, + path_match_flags::match_absent))) + { + if (!c.exclusion) + return false; + + if (reason != nullptr) + *reason = sanitize (c.comment); + + return true; + } + } + } + catch (const invalid_path&) {} + + return false; + } + + path + dash_components_to_path (const string& pattern) + { + string r; + size_t nstar (0); + for (const path_pattern_term& pt: path_pattern_iterator (pattern)) + { + switch (pt.type) + { + case path_pattern_term_type::star: + { + // Replace ** with */**/* and skip all the remaining stars that may + // follow in this sequence. + // + if (nstar == 0) + r += "*"; + else if (nstar == 1) + r += "/**/*"; // The first star is already copied. + + break; + } + case path_pattern_term_type::literal: + { + // Replace '-' with '/' and fall through otherwise. + // + if (get_literal (pt) == '-') + { + r += '/'; + break; + } + } + // Fall through. + default: + { + r.append (pt.begin, pt.end); // Copy the pattern term as is. + } + } + + nstar = pt.star () ? nstar + 1 : 0; + } + + // Append the trailing slash to match the resulting paths as directories. + // This is required for the trailing /* we could append to match absent + // directory path components (see path_match_flags::match_absent for + // details). + // + // Note that valid dash components may not contain a trailing dash. + // Anyway, any extra trailing slashes will be ignored by the path + // constructor. + // + r += '/'; + + return path (move (r)); + } +} diff --git a/mod/build-config.hxx b/mod/build-config.hxx new file mode 100644 index 0000000..d5e44ce --- /dev/null +++ b/mod/build-config.hxx @@ -0,0 +1,45 @@ +// file : mod/build-config.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_BUILD_CONFIG_HXX +#define MOD_BUILD_CONFIG_HXX + +#include <map> + +#include <libbpkg/manifest.hxx> + +#include <libbbot/build-config.hxx> + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +namespace brep +{ + // Return true if the specified build configuration is excluded by a package + // based on its underlying build class set, build class expressions, and + // build constraints, potentially extending the underlying set with the + // special classes. Set the exclusion reason if requested. + // + bool + exclude (const small_vector<bpkg::build_class_expr, 1>&, + const vector<bpkg::build_constraint>&, + const bbot::build_config&, + const std::map<string, string>& class_inheritance_map, + string* reason = nullptr); + + // Convert dash-separated components (target, build configuration name, + // machine name) or a pattern thereof into a path, replacing dashes with + // slashes (directory separators), `**` with `*/**/*`, and appending the + // trailing slash for a subsequent match using the path_match() + // functionality (the idea here is for `linux**` to match `linux-gcc` which + // is quite natural to expect). Throw invalid_path if the resulting path is + // invalid. + // + // Note that the match_absent path match flag must be used for the above + // `**` transformation to work. + // + path + dash_components_to_path (const string&); +} + +#endif // MOD_BUILD_CONFIG diff --git a/mod/build.cxx b/mod/build.cxx index cdbaa60..5b9d8aa 100644 --- a/mod/build.cxx +++ b/mod/build.cxx @@ -3,7 +3,7 @@ #include <mod/build.hxx> -#include <web/mime-url-encoding.hxx> +#include <web/server/mime-url-encoding.hxx> #include <mod/utility.hxx> diff --git a/mod/buildfile b/mod/buildfile index 9300faf..ca46bc4 100644 --- a/mod/buildfile +++ b/mod/buildfile @@ -19,25 +19,38 @@ import libs += libbpkg%lib{bpkg} import libs += libbbot%lib{bbot} include ../libbrep/ -include ../web/ -mod{brep}: {hxx ixx txx cxx}{* -options} \ - {hxx ixx cxx}{ options} \ - ../libbrep/lib{brep} ../web/libus{web} $libs +include ../web/xhtml/ +include ../web/server/ + +./: mod{brep} {libue libus}{mod} + +libu_src = options-types types-parsers build-config + +mod{brep}: {hxx ixx txx cxx}{* -module-options -{$libu_src}} \ + libus{mod} ../libbrep/lib{brep} ../web/server/libus{web-server} \ + $libs + +{libue libus}{mod}: {hxx ixx cxx}{module-options} \ + {hxx ixx txx cxx}{+{$libu_src} } \ + $libs + +libus{mod}: ../web/xhtml/libus{xhtml} +libue{mod}: ../web/xhtml/libue{xhtml} # Generated options parser. # if $cli.configured { - cli.cxx{options}: cli{options} + cli.cxx{module-options}: cli{module} # Set option prefix to the empty value to handle all unknown request # parameters uniformly with a single catch block. # - cli.options += --std c++11 -I $src_root --include-with-brackets \ ---include-prefix mod --guard-prefix MOD --generate-specifier \ ---cxx-prologue "#include <mod/types-parsers.hxx>" \ ---cli-namespace brep::cli --generate-file-scanner --suppress-usage \ + cli.options += --std c++11 -I $src_root --include-with-brackets \ +--include-prefix mod --guard-prefix MOD --generate-specifier \ +--cxx-prologue "#include <mod/types-parsers.hxx>" \ +--cli-namespace brep::cli --generate-file-scanner --option-length 38 \ --generate-modifier --generate-description --option-prefix "" # Include the generated cli files into the distribution and don't remove diff --git a/mod/database-module.cxx b/mod/database-module.cxx index 5516730..f598bfd 100644 --- a/mod/database-module.cxx +++ b/mod/database-module.cxx @@ -5,8 +5,8 @@ #include <odb/exceptions.hxx> -#include <mod/options.hxx> #include <mod/database.hxx> +#include <mod/module-options.hxx> namespace brep { diff --git a/mod/database-module.hxx b/mod/database-module.hxx index a41752d..f72ba83 100644 --- a/mod/database-module.hxx +++ b/mod/database-module.hxx @@ -10,7 +10,7 @@ #include <libbrep/utility.hxx> #include <mod/module.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> namespace brep { diff --git a/mod/mod-build-configs.cxx b/mod/mod-build-configs.cxx index 8efc6c9..6731b28 100644 --- a/mod/mod-build-configs.cxx +++ b/mod/mod-build-configs.cxx @@ -7,11 +7,12 @@ #include <libstudxml/serializer.hxx> -#include <web/xhtml.hxx> -#include <web/module.hxx> +#include <web/server/module.hxx> + +#include <web/xhtml/serialization.hxx> #include <mod/page.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace bbot; diff --git a/mod/mod-build-configs.hxx b/mod/mod-build-configs.hxx index 333680a..562ac6d 100644 --- a/mod/mod-build-configs.hxx +++ b/mod/mod-build-configs.hxx @@ -8,7 +8,7 @@ #include <libbrep/utility.hxx> #include <mod/module.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/build-config-module.hxx> namespace brep diff --git a/mod/mod-build-force.cxx b/mod/mod-build-force.cxx index 4dc71c8..bd172e3 100644 --- a/mod/mod-build-force.cxx +++ b/mod/mod-build-force.cxx @@ -8,12 +8,12 @@ #include <odb/database.hxx> #include <odb/transaction.hxx> -#include <web/module.hxx> +#include <web/server/module.hxx> #include <libbrep/build.hxx> #include <libbrep/build-odb.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace bbot; diff --git a/mod/mod-build-force.hxx b/mod/mod-build-force.hxx index 7b6b3b6..22df383 100644 --- a/mod/mod-build-force.hxx +++ b/mod/mod-build-force.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> #include <mod/build-config-module.hxx> diff --git a/mod/mod-build-log.cxx b/mod/mod-build-log.cxx index 16cc965..3032e52 100644 --- a/mod/mod-build-log.cxx +++ b/mod/mod-build-log.cxx @@ -10,12 +10,12 @@ #include <libbutl/timestamp.mxx> // to_stream() -#include <web/module.hxx> +#include <web/server/module.hxx> #include <libbrep/build.hxx> #include <libbrep/build-odb.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace bbot; diff --git a/mod/mod-build-log.hxx b/mod/mod-build-log.hxx index 9f9d1d9..a2f4e48 100644 --- a/mod/mod-build-log.hxx +++ b/mod/mod-build-log.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> #include <mod/build-config-module.hxx> diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx index b3467d2..734ea5c 100644 --- a/mod/mod-build-result.cxx +++ b/mod/mod-build-result.cxx @@ -15,15 +15,15 @@ #include <libbbot/manifest.hxx> -#include <web/module.hxx> +#include <web/server/module.hxx> #include <libbrep/build.hxx> #include <libbrep/build-odb.hxx> #include <libbrep/package.hxx> #include <libbrep/package-odb.hxx> -#include <mod/build.hxx> // *_url() -#include <mod/options.hxx> +#include <mod/build.hxx> // *_url() +#include <mod/module-options.hxx> using namespace std; using namespace butl; @@ -409,6 +409,7 @@ handle (request& rq, response&) b->results = move (rqm.result.results); b->timestamp = system_clock::now (); + b->completion_timestamp = b->timestamp; build_db_->update (b); diff --git a/mod/mod-build-result.hxx b/mod/mod-build-result.hxx index b3911e1..71a60f9 100644 --- a/mod/mod-build-result.hxx +++ b/mod/mod-build-result.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> #include <mod/build-config-module.hxx> diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index c232815..17bc15e 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -22,14 +22,14 @@ #include <libbbot/manifest.hxx> #include <libbbot/build-config.hxx> -#include <web/module.hxx> +#include <web/server/module.hxx> #include <libbrep/build.hxx> #include <libbrep/build-odb.hxx> #include <libbrep/build-package.hxx> #include <libbrep/build-package-odb.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace butl; @@ -384,28 +384,18 @@ handle (request& rq, response& rs) using prep_bld_query = prepared_query<build>; package_id id; - const auto& qv (bld_query::id.package.version); bld_query bq ( - bld_query::id.package.tenant == bld_query::_ref (id.tenant) && - - bld_query::id.package.name == bld_query::_ref (id.name) && - - qv.epoch == bld_query::_ref (id.version.epoch) && - qv.canonical_upstream == - bld_query::_ref (id.version.canonical_upstream) && - qv.canonical_release == - bld_query::_ref (id.version.canonical_release) && - qv.revision == bld_query::_ref (id.version.revision) && + equal<build> (bld_query::id.package, id) && bld_query::id.configuration.in_range (cfg_names.begin (), - cfg_names.end ()) && + cfg_names.end ()) && - bld_query::id.toolchain_name == tqm.toolchain_name && + bld_query::id.toolchain_name == tqm.toolchain_name && compare_version_eq (bld_query::id.toolchain_version, canonical_version (toolchain_version), - true /* revision */) && + true /* revision */) && (bld_query::state == "built" || ((bld_query::force == "forcing" && diff --git a/mod/mod-build-task.hxx b/mod/mod-build-task.hxx index 5f4c14a..7875db1 100644 --- a/mod/mod-build-task.hxx +++ b/mod/mod-build-task.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> #include <mod/build-config-module.hxx> diff --git a/mod/mod-builds.cxx b/mod/mod-builds.cxx index 77ebc05..ab9e93e 100644 --- a/mod/mod-builds.cxx +++ b/mod/mod-builds.cxx @@ -16,9 +16,10 @@ #include <libbbot/manifest.hxx> // to_result_status(), to_string(result_status) -#include <web/xhtml.hxx> -#include <web/module.hxx> -#include <web/mime-url-encoding.hxx> +#include <web/server/module.hxx> +#include <web/server/mime-url-encoding.hxx> + +#include <web/xhtml/serialization.hxx> #include <libbrep/build.hxx> #include <libbrep/build-odb.hxx> @@ -26,7 +27,7 @@ #include <libbrep/build-package-odb.hxx> #include <mod/page.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace butl; @@ -231,7 +232,10 @@ build_query (const brep::cstrings* configs, else { query sq (qb::status == rs); - result_status st (to_result_status(rs)); // May throw invalid_argument. + + // May throw invalid_argument. + // + result_status st (to_result_status (rs)); if (st != result_status::success) { @@ -312,22 +316,6 @@ package_query (const brep::params::builds& params, return q; } -template <typename T, typename ID> -static inline query<T> -package_id_eq (const ID& x, const brep::package_id& y) -{ - using query = query<T>; - const auto& qv (x.version); - - return - x.tenant == query::_ref (y.tenant) && - x.name == query::_ref (y.name) && - qv.epoch == query::_ref (y.version.epoch) && - qv.canonical_upstream == query::_ref (y.version.canonical_upstream) && - qv.canonical_release == query::_ref (y.version.canonical_release) && - qv.revision == query::_ref (y.version.revision); -} - static const vector<pair<string, string>> build_results ({ {"unbuilt", "<unbuilt>"}, {"*", "*"}, @@ -821,9 +809,8 @@ handle (request& rq, response& rs) const auto& bid (bld_query::build::id); - bld_query bq ( - package_id_eq<package_build_count> (bid.package, id) && - bid.configuration == bld_query::_ref (config) && + bld_query bq (equal<package_build_count> (bid.package, id) && + bid.configuration == bld_query::_ref (config) && // Note that the query already constrains configurations via the // configuration name and the tenant via the build package id. @@ -936,7 +923,7 @@ handle (request& rq, response& rs) package_id id; bld_query bq ( - package_id_eq<package_build> (bld_query::build::id.package, id) && + equal<package_build> (bld_query::build::id.package, id) && // Note that the query already constrains the tenant via the build // package id. diff --git a/mod/mod-builds.hxx b/mod/mod-builds.hxx index 714b374..0aa7916 100644 --- a/mod/mod-builds.hxx +++ b/mod/mod-builds.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> #include <mod/build-config-module.hxx> diff --git a/mod/mod-ci.cxx b/mod/mod-ci.cxx index 77377eb..d2da93f 100644 --- a/mod/mod-ci.cxx +++ b/mod/mod-ci.cxx @@ -17,11 +17,12 @@ #include <libbpkg/manifest.hxx> #include <libbpkg/package-name.hxx> -#include <web/xhtml.hxx> -#include <web/module.hxx> +#include <web/server/module.hxx> + +#include <web/xhtml/serialization.hxx> #include <mod/page.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/external-handler.hxx> using namespace std; @@ -116,8 +117,8 @@ handle (request& rq, response& rs) // latter case we will always respond with the same neutral message for // security reason, logging the error details. Note that descriptions of // exceptions caught by the web server are returned to the client (see - // web/module.hxx for details), and we want to avoid this when there is a - // danger of exposing sensitive data. + // web/server/module.hxx for details), and we want to avoid this when there + // is a danger of exposing sensitive data. // // Also we will pass through exceptions thrown by the underlying API, unless // we need to handle them or add details for the description, in which case diff --git a/mod/mod-ci.hxx b/mod/mod-ci.hxx index 1228714..431f53b 100644 --- a/mod/mod-ci.hxx +++ b/mod/mod-ci.hxx @@ -4,13 +4,13 @@ #ifndef MOD_MOD_CI_HXX #define MOD_MOD_CI_HXX -#include <web/xhtml-fragment.hxx> +#include <web/xhtml/fragment.hxx> #include <libbrep/types.hxx> #include <libbrep/utility.hxx> #include <mod/module.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> namespace brep { diff --git a/mod/mod-package-details.cxx b/mod/mod-package-details.cxx index c7973d3..e0bd1ef 100644 --- a/mod/mod-package-details.cxx +++ b/mod/mod-package-details.cxx @@ -9,15 +9,16 @@ #include <odb/database.hxx> #include <odb/transaction.hxx> -#include <web/xhtml.hxx> -#include <web/module.hxx> -#include <web/mime-url-encoding.hxx> +#include <web/server/module.hxx> +#include <web/server/mime-url-encoding.hxx> + +#include <web/xhtml/serialization.hxx> #include <libbrep/package.hxx> #include <libbrep/package-odb.hxx> #include <mod/page.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace odb::core; using namespace brep::cli; diff --git a/mod/mod-package-details.hxx b/mod/mod-package-details.hxx index 16f8c3e..e1b0a9c 100644 --- a/mod/mod-package-details.hxx +++ b/mod/mod-package-details.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> namespace brep diff --git a/mod/mod-package-version-details.cxx b/mod/mod-package-version-details.cxx index cde65b0..bfc08b0 100644 --- a/mod/mod-package-version-details.cxx +++ b/mod/mod-package-version-details.cxx @@ -9,9 +9,10 @@ #include <odb/database.hxx> #include <odb/transaction.hxx> -#include <web/xhtml.hxx> -#include <web/module.hxx> -#include <web/mime-url-encoding.hxx> +#include <web/server/module.hxx> +#include <web/server/mime-url-encoding.hxx> + +#include <web/xhtml/serialization.hxx> #include <libbrep/build.hxx> #include <libbrep/build-odb.hxx> @@ -19,7 +20,7 @@ #include <libbrep/package-odb.hxx> #include <mod/page.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace butl; diff --git a/mod/mod-package-version-details.hxx b/mod/mod-package-version-details.hxx index 8d0d373..a88d6c2 100644 --- a/mod/mod-package-version-details.hxx +++ b/mod/mod-package-version-details.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> #include <mod/build-config-module.hxx> diff --git a/mod/mod-packages.cxx b/mod/mod-packages.cxx index 81cf83c..65c7c5b 100644 --- a/mod/mod-packages.cxx +++ b/mod/mod-packages.cxx @@ -10,15 +10,16 @@ #include <odb/transaction.hxx> #include <odb/schema-catalog.hxx> -#include <web/xhtml.hxx> -#include <web/module.hxx> -#include <web/mime-url-encoding.hxx> +#include <web/server/module.hxx> +#include <web/server/mime-url-encoding.hxx> + +#include <web/xhtml/serialization.hxx> #include <libbrep/package.hxx> #include <libbrep/package-odb.hxx> #include <mod/page.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace odb::core; using namespace brep::cli; diff --git a/mod/mod-packages.hxx b/mod/mod-packages.hxx index d1c4677..611d63c 100644 --- a/mod/mod-packages.hxx +++ b/mod/mod-packages.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> namespace brep diff --git a/mod/mod-repository-details.cxx b/mod/mod-repository-details.cxx index 988c445..813b738 100644 --- a/mod/mod-repository-details.cxx +++ b/mod/mod-repository-details.cxx @@ -12,15 +12,16 @@ #include <libbutl/timestamp.mxx> // to_string() -#include <web/xhtml.hxx> -#include <web/module.hxx> -#include <web/mime-url-encoding.hxx> +#include <web/server/module.hxx> +#include <web/server/mime-url-encoding.hxx> + +#include <web/xhtml/serialization.hxx> #include <libbrep/package.hxx> #include <libbrep/package-odb.hxx> #include <mod/page.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace odb::core; diff --git a/mod/mod-repository-details.hxx b/mod/mod-repository-details.hxx index bd4b3ba..e83831d 100644 --- a/mod/mod-repository-details.hxx +++ b/mod/mod-repository-details.hxx @@ -7,7 +7,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/database-module.hxx> namespace brep diff --git a/mod/mod-repository-root.cxx b/mod/mod-repository-root.cxx index b6c54b8..02d6c93 100644 --- a/mod/mod-repository-root.cxx +++ b/mod/mod-repository-root.cxx @@ -10,10 +10,10 @@ #include <sstream> #include <algorithm> // find() -#include <web/module.hxx> +#include <web/server/module.hxx> #include <mod/module.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/mod-ci.hxx> #include <mod/mod-submit.hxx> diff --git a/mod/mod-repository-root.hxx b/mod/mod-repository-root.hxx index ac4b254..9e28797 100644 --- a/mod/mod-repository-root.hxx +++ b/mod/mod-repository-root.hxx @@ -8,7 +8,7 @@ #include <libbrep/utility.hxx> #include <mod/module.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> namespace brep { diff --git a/mod/mod-submit.cxx b/mod/mod-submit.cxx index 0dea2b7..9c93a36 100644 --- a/mod/mod-submit.cxx +++ b/mod/mod-submit.cxx @@ -14,11 +14,12 @@ #include <libbutl/manifest-types.mxx> #include <libbutl/manifest-serializer.mxx> -#include <web/xhtml.hxx> -#include <web/module.hxx> +#include <web/server/module.hxx> + +#include <web/xhtml/serialization.hxx> #include <mod/page.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> #include <mod/external-handler.hxx> using namespace std; diff --git a/mod/mod-submit.hxx b/mod/mod-submit.hxx index 96a60f9..fc5f8d4 100644 --- a/mod/mod-submit.hxx +++ b/mod/mod-submit.hxx @@ -4,13 +4,13 @@ #ifndef MOD_MOD_SUBMIT_HXX #define MOD_MOD_SUBMIT_HXX -#include <web/xhtml-fragment.hxx> +#include <web/xhtml/fragment.hxx> #include <libbrep/types.hxx> #include <libbrep/utility.hxx> #include <mod/module.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> namespace brep { diff --git a/mod/module.cli b/mod/module.cli new file mode 100644 index 0000000..fa1d2cc --- /dev/null +++ b/mod/module.cli @@ -0,0 +1,811 @@ +// file : mod/options.cli -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +include <libbpkg/manifest.hxx>; // repository_location + +include <web/xhtml/fragment.hxx>; + +include <libbrep/types.hxx>; + +include <mod/options-types.hxx>; + +namespace brep +{ + // Web handler configuration options. + // + namespace options + { + // Option groups. + // + class handler + { + string email + { + "<email>", + "Repository email. This email is used for the \cb{From:} header in + emails send by \cb{brep} (for example, build failure notifications)." + } + + string host + { + "<host>", + "Repository host. It specifies the scheme and the host address (but + not the root path; see \cb{root} below) that will be used whenever + \cb{brep} needs to construct an absolute URL to one of its locations + (for example, a link to a build log that is being send via email)." + } + + dir_path root = "/" + { + "<path>", + "Repository root. That is, this is the part of the URL between the + host name and the start of the repository. For example, root value + '\cb{/pkg}' means the repository URL is \cb{http://example.org/pkg/}. + Specify '\cb{/}' to use the web server root + (\cb{http://example.org/})." + } + + string tenant-name = "tenant" + { + "<name>", + "Name to call the tenant values on web pages. If not specified, then + \cb{tenant} is used." + } + + uint16_t verbosity = 0 + { + "<level>", + "Trace verbosity level. Level 0 disables tracing, which is also the + default." + } + }; + + class openssl_options + { + path openssl = "openssl" + { + "<path>", + "The openssl program to be used for crypto operations. You can also + specify additional options that should be passed to the openssl + program with \cb{openssl-option}. If the openssl program is not + explicitly specified, then \cb{brep} will use \cb{openssl} by + default." + } + + strings openssl-option + { + "<opt>", + "Additional option to be passed to the openssl program (see + \cb{openssl} for details). Repeat this option to specify multiple + openssl options." + } + + strings openssl-envvar + { + "<name>[=value]", + "Environment variable to be set (<name>=<value>) or unset (just + <name>) for the openssl program (see \cb{openssl} for details). + Repeat this option to specify multiple openssl variables. Note + that unspecified variables are inherited from the web server + process. + + You need to at least set the \cb{RANDFILE} environment variable + to change the default location of the openssl program seed file + and maybe also the \cb{OPENSSL_CONF} variable if you would like + to use a custom openssl configuration file." + } + }; + + class package_db + { + string package-db-user + { + "<user>", + "Package database login user name. If not specified, then operating + system (login) name is used. See also \c{package-db-role}." + } + + string package-db-role = "brep" + { + "<user>", + "Package database execution user name. If not empty then the login + user will be switched (with \c{SET ROLE}) to this user prior to + executing any statements. If not specified, then \cb{brep} is used." + } + + string package-db-password + { + "<pass>", + "Package database password. If not specified, then login without + password is expected to work." + } + + string package-db-name = "brep_package" + { + "<name>", + "Package database name. If not specified, then \cb{brep_package} is + used by default." + } + + string package-db-host + { + "<host>", + "Package database host name, address, or socket. If not specified, then + connect to \cb{localhost} using the operating system-default + mechanism (Unix-domain socket, etc)." + } + + uint16_t package-db-port = 0 + { + "<port>", + "Package database port number. If not specified, the default port is + used." + } + + size_t package-db-max-connections = 5 + { + "<num>", + "The maximum number of concurrent package database connections per web + server process. If 0, then no limitation is applied. The default is + 5." + } + + size_t package-db-retry = 10 + { + "<num>", + "The maximum number of times to retry package database transactions in + the face of recoverable failures (deadlock, loss of connection, etc). + The default is 10." + } + }; + + class build: openssl_options + { + path build-config + { + "<buildtab>", + "Build configuration file. If not specified, then the package building + functionality will be disabled. If specified, then the build database + must be configured (see \cb{build-db-*}). The \cb{brep} instance + needs to be restarted after modifying <buildtab> for the changes to + take effect." + } + + dir_path build-bot-agent-keys + { + "<dir>", + "Directory containing build bot agent public keys. If specified, then + \cb{brep} will perform agent authentication and will reject build + results from unauthenticated ones. If not specified, then build + results are accepted from all agents (which will be a security + risk if the \cb{brep} instance is publicly accessible). + + The directory is expected to contain one PEM-encoded public key + per file with the \cb{.pem} extension. All other files and + subdirectories are ignored. The \cb{brep} instance needs to be + restarted after adding new key files for the changes to take effect." + } + + size_t build-forced-rebuild-timeout = 600 + { + "<seconds>", + "Time to wait before considering a package for a forced rebuild. Must + be specified in seconds. Default is 10 minutes." + } + + size_t build-normal-rebuild-timeout = 86400 + { + "<seconds>", + "Time to wait before considering a package for a normal rebuild. Must + be specified in seconds. Default is 24 hours." + } + }; + + class build_db + { + string build-db-user + { + "<user>", + "Build database login user name. If not specified, then operating + system (login) name is used. See also \c{build-db-role}." + } + + string build-db-role = "brep" + { + "<user>", + "Build database execution user name. If not empty then the login + user will be switched (with \c{SET ROLE}) to this user prior to + executing any statements. If not specified, then \cb{brep} is used." + } + + string build-db-password + { + "<pass>", + "Build database password. If not specified, then login without + password is expected to work." + } + + string build-db-name = "brep_build" + { + "<name>", + "Build database name. If not specified, then \cb{brep_build} is used + by default." + } + + string build-db-host + { + "<host>", + "Build database host name, address, or socket. If not specified, then + connect to \cb{localhost} using the operating system-default + mechanism (Unix-domain socket, etc)." + } + + uint16_t build-db-port = 0 + { + "<port>", + "Build database port number. If not specified, the default port is + used." + } + + size_t build-db-max-connections = 5 + { + "<num>", + "The maximum number of concurrent build database connections per web + server process. If 0, then no limitation is applied. The default is + 5." + } + + size_t build-db-retry = 10 + { + "<num>", + "The maximum number of times to retry build database transactions in + the face of recoverable failures (deadlock, loss of connection, etc). + The default is 10." + } + }; + + class page + { + web::xhtml::fragment logo + { + "<xhtml>", + "Web page logo. It is displayed in the page header aligned to the left + edge. The value is treated as an XHTML5 fragment." + } + + vector<page_menu> menu; + { + "<label=link>", + "Web page menu. Each entry is displayed in the page header in the + order specified and aligned to the right edge. A link target that + starts with '\cb{/}' or contains '\cb{:}' is used as is. Otherwise, + it is prefixed with the repository web interface root." + } + }; + + class search + { + uint16_t search-page-entries = 20 + { + "<num>", + "Number of packages per page. The default is 20." + } + + uint16_t search-pages = 5 + { + "<num>", + "Number of pages in navigation (pager). The default is 5." + } + }; + + class package + { + uint16_t package-description = 500 + { + "<len>", + "Number of package description characters to display in brief pages. + The default is 500 (~ 80 characters * 6 lines)." + } + + uint16_t package-changes = 5000; + { + "<len>", + "Number of package changes characters to display in brief pages. The + default is 5000 (~ 80 chars x 60 lines)." + } + }; + + // Handler options. + // + + class packages: search, package_db, page, handler + { + string search-title = "Packages" + { + "<text>", + "Package search page title. It is placed inside XHTML5 <title> + element." + } + }; + + class package_details: package, search, package_db, page, handler + { + }; + + class package_version_details: package, package_db, + build, build_db, + page, + handler + { + }; + + class repository_details: package_db, page, handler + { + }; + + class build_task: build, build_db, handler + { + size_t build-task-request-max-size = 102400 + { + "<bytes>", + "The maximum size of the build task request manifest accepted. Note + that the HTTP POST request body is cached to retry database + transactions in the face of recoverable failures (deadlock, loss of + connection, etc). The default is 100K." + } + + size_t build-result-timeout = 10800 + { + "<seconds>", + "Time to wait before considering the expected task result lost. Must be + specified in seconds. The default is 3 hours." + } + }; + + class build_result: build, package_db, build_db, handler + { + size_t build-result-request-max-size = 10240000 + { + "<bytes>", + "The maximum size of the build result manifest accepted. Note that the + HTTP POST request body is cached to retry database transactions in the + face of recoverable failures (deadlock, loss of connection, etc). The + default is 10M." + } + }; + + class build_log: build, build_db, handler + { + }; + + class build_force: build, build_db, handler + { + }; + + class builds: build, build_db, page, handler + { + uint16_t build-page-entries = 20 + { + "<num>", + "Number of builds per page. The default is 20." + } + + uint16_t build-pages = 5 + { + "<num>", + "Number of pages in navigation (pager). The default is 5." + } + }; + + class build_configs: build, page, handler + { + uint16_t build-config-page-entries = 20 + { + "<num>", + "Number of build configurations per page. The default is 20." + } + + uint16_t build-config-pages = 5 + { + "<num>", + "Number of pages in navigation (pager). The default is 5." + } + }; + + class submit: page, handler + { + dir_path submit-data + { + "<dir>", + "The directory to save final submission data to. If unspecified, the + package submission functionality will be disabled. If specified, + then \cb{submit-temp} must be specified as well. See \l{brep The + \cb{build2} Repository Interface Manual} for more information on + package submission. + + Note that the directory path must be absolute and the directory + itself must exist and have read, write, and execute permissions + granted to the user that runs the web server." + } + + dir_path submit-temp + { + "<dir>", + "The directory to save temporary submission data to. Must be specified + if the package submission functionality is enabled. + + Note that this directory must be on the same filesystem and satisfy + the same requirements as \cb{submit-data}. It is also the user's + responsibility to clean it up after an unclean web server shutdown." + } + + size_t submit-max-size = 10485760 + { + "<bytes>", + "The maximum size of the submission data accepted. Note that currently + the entire submission request is read into memory. The default is + 10M." + } + + path submit-form + { + "<file>", + "The package submission form fragment. If specified, then its contents + are treated as an XHTML5 fragment that is inserted into the <body> + element of the submission page. If unspecified, then no submission + page will be displayed. Note that the file path must be absolute." + } + + string submit-email + { + "<email>", + "The package submission email. If specified, the submission request + and result manifests will be sent to this address. See \l{brep The + \cb{build2} Repository Interface Manual} for more information." + } + + path submit-handler + { + "<path>", + "The handler program to be executed on package submission. The handler + is executed as part of the HTTP request and is passed additional + arguments that can be specified with \cb{submit-handler-argument} + followed by the absolute path to the submission directory. See + \l{brep The \cb{build2} Repository Interface Manual} for more + information. Note that the program path must be absolute." + } + + strings submit-handler-argument + { + "<arg>", + "Additional arguments to be passed to the submission handler program + (see \cb{submit-handler} for details). Repeat this option to specify + multiple arguments." + } + + size_t submit-handler-timeout + { + "<seconds>", + "The submission handler program timeout in seconds. If specified and + the handler does not exit in the allotted time, then it is killed and + its termination is treated as abnormal." + } + }; + + class ci: page, handler + { + dir_path ci-data + { + "<dir>", + "The directory to save CI request data to. If unspecified, the + package CI functionality will be disabled. See \l{brep The + \cb{build2} Repository Interface Manual} for more information on + package CI. + + Note that the directory path must be absolute and the directory + itself must exist and have read, write, and execute permissions + granted to the user that runs the web server." + } + + path ci-form + { + "<file>", + "The package CI form fragment. If specified, then its contents are + treated as an XHTML5 fragment that is inserted into the <body> + element of the CI page. If unspecified, then no CI page will be + displayed. Note that the file path must be absolute." + } + + string ci-email + { + "<email>", + "The package CI email. If specified, the CI request and result + manifests will be sent to this address. See \l{brep The \cb{build2} + Repository Interface Manual} for more information." + } + + path ci-handler + { + "<path>", + "The handler program to be executed on CI request. The handler is + executed as part of the HTTP request and is passed additional + arguments that can be specified with \cb{ci-handler-argument} + followed by the absolute path to the CI request directory. See + \l{brep The \cb{build2} Repository Interface Manual} for more + information. Note that the program path must be absolute." + } + + strings ci-handler-argument + { + "<arg>", + "Additional arguments to be passed to the CI handler program (see + \cb{ci-handler} for details). Repeat this option to specify multiple + arguments." + } + + size_t ci-handler-timeout + { + "<seconds>", + "The CI handler program timeout in seconds. If specified and the + handler does not exit in the allotted time, then it is killed and + its termination is treated as abnormal." + } + }; + + class repository_root: handler + { + string root-global-view = "packages" + { + "<service>", + "The default view to display for the global repository root. The + <service> argument is one of the supported services (\c{packages}, + \c{builds}, \c{submit}, \c{ci}, etc). The default service is + packages." + } + + string root-tenant-view = "packages" + { + "<service>", + "The default view to display for the tenant repository root. The + <service> argument is one of the supported services (\c{packages}, + \c{builds}, \c{submit}, \c{ci}, etc). The default service is + packages." + } + }; + } + + // Web handler HTTP request parameters. + // + namespace params + { + // Use parameters long names in the C++ code, short aliases (if present) + // in HTTP URL. + // + class packages + { + // Display package search result list starting from this page. + // + uint16_t page | p; + + // Package search criteria. + // + // Note that the packages parameter is renamed to '_' by the root + // handler (see the request_proxy class for details). + // + string q | _; + }; + + class package_details + { + // Display package version search result list starting from this page. + // + uint16_t page | p; + + // Package version search criteria. + // + string query | q; + + // Page form. + // + page_form form | f = page_form::brief; + }; + + class package_version_details + { + // Page form. + // + page_form form | f = page_form::brief; + }; + + class repository_details + { + // No parameters so far. + // + }; + + class build_task + { + // Package repository canonical name (note: including pkg: type). + // + vector<string> repository | r; + }; + + class build_result + { + // No parameters so far. + // + }; + + class build_log + { + // No parameters so far. + // + }; + + // All parameters are non-optional. + // + class build_force + { + // Package name. + // + string package | pn; + + // Package version. May not be url-encoded, in which case the plus + // character is considered literally (rather than as the encoded space + // character). In other words, after url-decoding the space character is + // treated the same way as the plus character. + // + // @@ Make it of the version type? Maybe after it get moved to + // libbpkg/types.hxx or at least the second use case appear. + // + string version | pv; + + // Package build configuration. + // + string configuration | cf; + + // Toolchain name. + // + string toolchain_name | tn; + + // Toolchain version. May not be url-encoded (see above). + // + string toolchain_version | tv; + + // Package rebuild reason. Must not be empty. + // + string reason; + }; + + class builds + { + // Display packages build configurations list starting from this page. + // + uint16_t page | p; + + // Package builds query filter options. + // + + // Package name wildcard. An empty value is treated the same way as *. + // + // We used to generate URLs like: + // + // https://cppget.org/?builds&pn=bbot + // + // This looked a bit verbose, so now we produce URLs like: + // + // https://cppget.org/?builds=bbot + // + // To support the already distributed URLs the name_legacy (pn) parameter + // overrides the name (builds) parameter, if present. Note that the + // builds parameter is renamed to '_' by the root handler (see the + // request_proxy class for details). + // + string name | _; + string name_legacy | pn; + + // Package version. If empty or *, then no version constraint is applied. + // Otherwise the build package version must match the value exactly. + // + string version | pv; + + // Package build toolchain in the <name>-<version> form. If *, then no + // toolchain constraint is applied. Otherwise the build toolchain name + // and version must match the value exactly. + // + string toolchain | tc = "*"; + + // Package build configuration name wildcard. An empty value is treated + // the same way as *. + // + string configuration | cf; + + // Package build machine name wildcard. An empty value is treated the + // same way as *. + // + string machine | mn; + + // Package build target wildcard. An empty value is treated the same way + // as *. + // + string target | tg; + + // Package build result. If *, then no build result constraint is + // applied. Otherwise the value is supposed to be the one of the + // following (ordered) statuses: pending, building, success, warning, + // error, abort, abnormal. The first 3 statuses are checked for equality, + // the rest - for being greater or equal. + // + string result | rs = "*"; + }; + + class build_configs + { + // Note that the build-configs parameter is renamed to '_' by the root + // handler (see the request_proxy class for details). + // + string class_name | _ = "all"; + + // Display build configurations list starting from this page. + // + uint16_t page | p; + }; + + // Parameters, except simulate, must either be all present (actual + // submission) or absent (submission form request). + // + // Note also that besides these parameters there can be others. We don't + // recognize their semantics and just save them to the submission request + // manifest. + // + class submit + { + // Package archive file name. Must be <input type="file"/>. + // + // Note that it can potentially be not just a name but a file path and + // in the client's form (e.g., Windows). + // + string archive; + + // Package archive file SHA256 checksum. + // + string sha256sum; + + // Submission simulation outcome. + // + string simulate; + }; + + // Parameters, except simulate, must either be all present (actual CI + // request) or absent (CI form request). + // + // Note also that besides these parameters there can be others. We don't + // recognize their semantics and just save them to the CI request + // manifest. + // + class ci + { + // Package repository location. + // + // Note that the ci parameter is renamed to '_' by the root handler (see + // the request_proxy class for details). + // + bpkg::repository_location repository | _; + + // Package names/versions. + // + strings package; + + // Overrides file name. Must be <input type="file"/>. + // + // Note that we don't really need this name and only check if this + // parameter is specified to detect presence of the upload. + // + string overrides; + + // Submission simulation outcome. + // + string simulate; + }; + } +} diff --git a/mod/module.cxx b/mod/module.cxx index 8f306fd..06799d7 100644 --- a/mod/module.cxx +++ b/mod/module.cxx @@ -10,10 +10,10 @@ #include <cstring> // strchr() #include <functional> // bind() -#include <web/module.hxx> -#include <web/apache/log.hxx> +#include <web/server/module.hxx> +#include <web/server/apache/log.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace placeholders; // For std::bind's _1, etc. diff --git a/mod/module.hxx b/mod/module.hxx index 2c62166..b3ed67b 100644 --- a/mod/module.hxx +++ b/mod/module.hxx @@ -4,14 +4,14 @@ #ifndef MOD_MODULE_HXX #define MOD_MODULE_HXX -#include <web/module.hxx> +#include <web/server/module.hxx> #include <libbrep/types.hxx> #include <libbrep/utility.hxx> #include <mod/utility.hxx> -#include <mod/options.hxx> #include <mod/diagnostics.hxx> +#include <mod/module-options.hxx> namespace brep { diff --git a/mod/options.cli b/mod/options.cli deleted file mode 100644 index f02d7a6..0000000 --- a/mod/options.cli +++ /dev/null @@ -1,811 +0,0 @@ -// file : mod/options.cli -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -include <libbpkg/manifest.hxx>; // repository_location - -include <web/xhtml-fragment.hxx>; - -include <libbrep/types.hxx>; - -include <mod/options-types.hxx>; - -namespace brep -{ - // Web handler configuration options. - // - namespace options - { - // Option groups. - // - class handler - { - string email - { - "<email>", - "Repository email. This email is used for the \cb{From:} header in - emails send by \cb{brep} (for example, build failure notifications)." - } - - string host - { - "<host>", - "Repository host. It specifies the scheme and the host address (but - not the root path; see \cb{root} below) that will be used whenever - \cb{brep} needs to construct an absolute URL to one of its locations - (for example, a link to a build log that is being send via email)." - } - - dir_path root = "/" - { - "<path>" - "Repository root. That is, this is the part of the URL between the - host name and the start of the repository. For example, root value - '\cb{/pkg}' means the repository URL is \cb{http://example.org/pkg/}. - Specify '\cb{/}' to use the web server root - (\cb{http://example.org/})." - } - - string tenant-name = "tenant" - { - "<name>", - "Name to call the tenant values on web pages. If not specified, then - \cb{tenant} is used." - } - - uint16_t verbosity = 0 - { - "<level>", - "Trace verbosity level. Level 0 disables tracing, which is also the - default." - } - }; - - class openssl_options - { - path openssl = "openssl" - { - "<path>", - "The openssl program to be used for crypto operations. You can also - specify additional options that should be passed to the openssl - program with \cb{openssl-option}. If the openssl program is not - explicitly specified, then \cb{brep} will use \cb{openssl} by - default." - } - - strings openssl-option - { - "<opt>", - "Additional option to be passed to the openssl program (see - \cb{openssl} for details). Repeat this option to specify multiple - openssl options." - } - - strings openssl-envvar - { - "<name>[=value]", - "Environment variable to be set (<name>=<value>) or unset (just - <name>) for the openssl program (see \cb{openssl} for details). - Repeat this option to specify multiple openssl variables. Note - that unspecified variables are inherited from the web server - process. - - You need to at least set the \cb{RANDFILE} environment variable - to change the default location of the openssl program seed file - and maybe also the \cb{OPENSSL_CONF} variable if you would like - to use a custom openssl configuration file." - } - }; - - class package_db - { - string package-db-user - { - "<user>", - "Package database login user name. If not specified, then operating - system (login) name is used. See also \c{package-db-role}." - } - - string package-db-role = "brep" - { - "<user>", - "Package database execution user name. If not empty then the login - user will be switched (with \c{SET ROLE}) to this user prior to - executing any statements. If not specified, then \cb{brep} is used." - } - - string package-db-password - { - "<pass>", - "Package database password. If not specified, then login without - password is expected to work." - } - - string package-db-name = "brep_package" - { - "<name>", - "Package database name. If not specified, then \cb{brep_package} is - used by default." - } - - string package-db-host - { - "<host>", - "Package database host name, address, or socket. If not specified, then - connect to \cb{localhost} using the operating system-default - mechanism (Unix-domain socket, etc)." - } - - uint16_t package-db-port = 0 - { - "<port>", - "Package database port number. If not specified, the default port is - used." - } - - size_t package-db-max-connections = 5 - { - "<num>", - "The maximum number of concurrent package database connections per web - server process. If 0, then no limitation is applied. The default is - 5." - } - - size_t package-db-retry = 10 - { - "<num>", - "The maximum number of times to retry package database transactions in - the face of recoverable failures (deadlock, loss of connection, etc). - The default is 10." - } - }; - - class build: openssl_options - { - path build-config - { - "<buildtab>", - "Build configuration file. If not specified, then the package building - functionality will be disabled. If specified, then the build database - must be configured (see \cb{build-db-*}). The \cb{brep} instance - needs to be restarted after modifying <buildtab> for the changes to - take effect." - } - - dir_path build-bot-agent-keys - { - "<dir>", - "Directory containing build bot agent public keys. If specified, then - \cb{brep} will perform agent authentication and will reject build - results from unauthenticated ones. If not specified, then build - results are accepted from all agents (which will be a security - risk if the \cb{brep} instance is publicly accessible). - - The directory is expected to contain one PEM-encoded public key - per file with the \cb{.pem} extension. All other files and - subdirectories are ignored. The \cb{brep} instance needs to be - restarted after adding new key files for the changes to take effect." - } - - size_t build-forced-rebuild-timeout = 600 - { - "<seconds>", - "Time to wait before considering a package for a forced rebuild. Must - be specified in seconds. Default is 10 minutes." - } - - size_t build-normal-rebuild-timeout = 86400 - { - "<seconds>", - "Time to wait before considering a package for a normal rebuild. Must - be specified in seconds. Default is 24 hours." - } - }; - - class build_db - { - string build-db-user - { - "<user>", - "Build database login user name. If not specified, then operating - system (login) name is used. See also \c{build-db-role}." - } - - string build-db-role = "brep" - { - "<user>", - "Build database execution user name. If not empty then the login - user will be switched (with \c{SET ROLE}) to this user prior to - executing any statements. If not specified, then \cb{brep} is used." - } - - string build-db-password - { - "<pass>", - "Build database password. If not specified, then login without - password is expected to work." - } - - string build-db-name = "brep_build" - { - "<name>", - "Build database name. If not specified, then \cb{brep_build} is used - by default." - } - - string build-db-host - { - "<host>", - "Build database host name, address, or socket. If not specified, then - connect to \cb{localhost} using the operating system-default - mechanism (Unix-domain socket, etc)." - } - - uint16_t build-db-port = 0 - { - "<port>", - "Build database port number. If not specified, the default port is - used." - } - - size_t build-db-max-connections = 5 - { - "<num>", - "The maximum number of concurrent build database connections per web - server process. If 0, then no limitation is applied. The default is - 5." - } - - size_t build-db-retry = 10 - { - "<num>", - "The maximum number of times to retry build database transactions in - the face of recoverable failures (deadlock, loss of connection, etc). - The default is 10." - } - }; - - class page - { - web::xhtml::fragment logo - { - "<xhtml>", - "Web page logo. It is displayed in the page header aligned to the left - edge. The value is treated as an XHTML5 fragment." - } - - vector<page_menu> menu; - { - "<label=link>", - "Web page menu. Each entry is displayed in the page header in the - order specified and aligned to the right edge. A link target that - starts with '\cb{/}' or contains '\cb{:}' is used as is. Otherwise, - it is prefixed with the repository web interface root." - } - }; - - class search - { - uint16_t search-page-entries = 20 - { - "<num>", - "Number of packages per page. The default is 20." - } - - uint16_t search-pages = 5 - { - "<num>", - "Number of pages in navigation (pager). The default is 5." - } - }; - - class package - { - uint16_t package-description = 500 - { - "<len>", - "Number of package description characters to display in brief pages. - The default is 500 (~ 80 characters * 6 lines)." - } - - uint16_t package-changes = 5000; - { - "<len>", - "Number of package changes characters to display in brief pages. The - default is 5000 (~ 80 chars x 60 lines)." - } - }; - - // Handler options. - // - - class packages: search, package_db, page, handler - { - string search-title = "Packages" - { - "<text>", - "Package search page title. It is placed inside XHTML5 <title> - element." - } - }; - - class package_details: package, search, package_db, page, handler - { - }; - - class package_version_details: package, package_db, - build, build_db, - page, - handler - { - }; - - class repository_details: package_db, page, handler - { - }; - - class build_task: build, build_db, handler - { - size_t build-task-request-max-size = 102400 - { - "<bytes>", - "The maximum size of the build task request manifest accepted. Note - that the HTTP POST request body is cached to retry database - transactions in the face of recoverable failures (deadlock, loss of - connection, etc). The default is 100K." - } - - size_t build-result-timeout = 10800 - { - "<seconds>", - "Time to wait before considering the expected task result lost. Must be - specified in seconds. The default is 3 hours." - } - }; - - class build_result: build, package_db, build_db, handler - { - size_t build-result-request-max-size = 10240000 - { - "<bytes>", - "The maximum size of the build result manifest accepted. Note that the - HTTP POST request body is cached to retry database transactions in the - face of recoverable failures (deadlock, loss of connection, etc). The - default is 10M." - } - }; - - class build_log: build, build_db, handler - { - }; - - class build_force: build, build_db, handler - { - }; - - class builds: build, build_db, page, handler - { - uint16_t build-page-entries = 20 - { - "<num>", - "Number of builds per page. The default is 20." - } - - uint16_t build-pages = 5 - { - "<num>", - "Number of pages in navigation (pager). The default is 5." - } - }; - - class build_configs: build, page, handler - { - uint16_t build-config-page-entries = 20 - { - "<num>", - "Number of build configurations per page. The default is 20." - } - - uint16_t build-config-pages = 5 - { - "<num>", - "Number of pages in navigation (pager). The default is 5." - } - }; - - class submit: page, handler - { - dir_path submit-data - { - "<dir>", - "The directory to save final submission data to. If unspecified, the - package submission functionality will be disabled. If specified, - then \cb{submit-temp} must be specified as well. See \l{brep The - \cb{build2} Repository Interface Manual} for more information on - package submission. - - Note that the directory path must be absolute and the directory - itself must exist and have read, write, and execute permissions - granted to the user that runs the web server." - } - - dir_path submit-temp - { - "<dir>", - "The directory to save temporary submission data to. Must be specified - if the package submission functionality is enabled. - - Note that this directory must be on the same filesystem and satisfy - the same requirements as \cb{submit-data}. It is also the user's - responsibility to clean it up after an unclean web server shutdown." - } - - size_t submit-max-size = 10485760 - { - "<bytes>", - "The maximum size of the submission data accepted. Note that currently - the entire submission request is read into memory. The default is - 10M." - } - - path submit-form - { - "<file>", - "The package submission form fragment. If specified, then its contents - are treated as an XHTML5 fragment that is inserted into the <body> - element of the submission page. If unspecified, then no submission - page will be displayed. Note that the file path must be absolute." - } - - string submit-email - { - "<email>", - "The package submission email. If specified, the submission request - and result manifests will be sent to this address. See \l{brep The - \cb{build2} Repository Interface Manual} for more information." - } - - path submit-handler - { - "<path>", - "The handler program to be executed on package submission. The handler - is executed as part of the HTTP request and is passed additional - arguments that can be specified with \cb{submit-handler-argument} - followed by the absolute path to the submission directory. See - \l{brep The \cb{build2} Repository Interface Manual} for more - information. Note that the program path must be absolute." - } - - strings submit-handler-argument - { - "<arg>", - "Additional arguments to be passed to the submission handler program - (see \cb{submit-handler} for details). Repeat this option to specify - multiple arguments." - } - - size_t submit-handler-timeout - { - "<seconds>", - "The submission handler program timeout in seconds. If specified and - the handler does not exit in the allotted time, then it is killed and - its termination is treated as abnormal." - } - }; - - class ci: page, handler - { - dir_path ci-data - { - "<dir>", - "The directory to save CI request data to. If unspecified, the - package CI functionality will be disabled. See \l{brep The - \cb{build2} Repository Interface Manual} for more information on - package CI. - - Note that the directory path must be absolute and the directory - itself must exist and have read, write, and execute permissions - granted to the user that runs the web server." - } - - path ci-form - { - "<file>", - "The package CI form fragment. If specified, then its contents are - treated as an XHTML5 fragment that is inserted into the <body> - element of the CI page. If unspecified, then no CI page will be - displayed. Note that the file path must be absolute." - } - - string ci-email - { - "<email>", - "The package CI email. If specified, the CI request and result - manifests will be sent to this address. See \l{brep The \cb{build2} - Repository Interface Manual} for more information." - } - - path ci-handler - { - "<path>", - "The handler program to be executed on CI request. The handler is - executed as part of the HTTP request and is passed additional - arguments that can be specified with \cb{ci-handler-argument} - followed by the absolute path to the CI request directory. See - \l{brep The \cb{build2} Repository Interface Manual} for more - information. Note that the program path must be absolute." - } - - strings ci-handler-argument - { - "<arg>", - "Additional arguments to be passed to the CI handler program (see - \cb{ci-handler} for details). Repeat this option to specify multiple - arguments." - } - - size_t ci-handler-timeout - { - "<seconds>", - "The CI handler program timeout in seconds. If specified and the - handler does not exit in the allotted time, then it is killed and - its termination is treated as abnormal." - } - }; - - class repository_root: handler - { - string root-global-view = "packages" - { - "<service>", - "The default view to display for the global repository root. The - <service> argument is one of the supported services (\c{packages}, - \c{builds}, \c{submit}, \c{ci}, etc). The default service is - packages." - } - - string root-tenant-view = "packages" - { - "<service>" - "The default view to display for the tenant repository root. The - <service> argument is one of the supported services (\c{packages}, - \c{builds}, \c{submit}, \c{ci}, etc). The default service is - packages." - } - }; - } - - // Web handler HTTP request parameters. - // - namespace params - { - // Use parameters long names in the C++ code, short aliases (if present) - // in HTTP URL. - // - class packages - { - // Display package search result list starting from this page. - // - uint16_t page | p; - - // Package search criteria. - // - // Note that the packages parameter is renamed to '_' by the root - // handler (see the request_proxy class for details). - // - string q | _; - }; - - class package_details - { - // Display package version search result list starting from this page. - // - uint16_t page | p; - - // Package version search criteria. - // - string query | q; - - // Page form. - // - page_form form | f = page_form::brief; - }; - - class package_version_details - { - // Page form. - // - page_form form | f = page_form::brief; - }; - - class repository_details - { - // No parameters so far. - // - }; - - class build_task - { - // Package repository canonical name (note: including pkg: type). - // - vector<string> repository | r; - }; - - class build_result - { - // No parameters so far. - // - }; - - class build_log - { - // No parameters so far. - // - }; - - // All parameters are non-optional. - // - class build_force - { - // Package name. - // - string package | pn; - - // Package version. May not be url-encoded, in which case the plus - // character is considered literally (rather than as the encoded space - // character). In other words, after url-decoding the space character is - // treated the same way as the plus character. - // - // @@ Make it of the version type? Maybe after it get moved to - // libbpkg/types.hxx or at least the second use case appear. - // - string version | pv; - - // Package build configuration. - // - string configuration | cf; - - // Toolchain name. - // - string toolchain_name | tn; - - // Toolchain version. May not be url-encoded (see above). - // - string toolchain_version | tv; - - // Package rebuild reason. Must not be empty. - // - string reason; - }; - - class builds - { - // Display packages build configurations list starting from this page. - // - uint16_t page | p; - - // Package builds query filter options. - // - - // Package name wildcard. An empty value is treated the same way as *. - // - // We used to generate URLs like: - // - // https://cppget.org/?builds&pn=bbot - // - // This looked a bit verbose, so now we produce URLs like: - // - // https://cppget.org/?builds=bbot - // - // To support the already distributed URLs the name_legacy (pn) parameter - // overrides the name (builds) parameter, if present. Note that the - // builds parameter is renamed to '_' by the root handler (see the - // request_proxy class for details). - // - string name | _; - string name_legacy | pn; - - // Package version. If empty or *, then no version constraint is applied. - // Otherwise the build package version must match the value exactly. - // - string version | pv; - - // Package build toolchain in the <name>-<version> form. If *, then no - // toolchain constraint is applied. Otherwise the build toolchain name - // and version must match the value exactly. - // - string toolchain | tc = "*"; - - // Package build configuration name wildcard. An empty value is treated - // the same way as *. - // - string configuration | cf; - - // Package build machine name wildcard. An empty value is treated the - // same way as *. - // - string machine | mn; - - // Package build target wildcard. An empty value is treated the same way - // as *. - // - string target | tg; - - // Package build result. If *, then no build result constraint is - // applied. Otherwise the value is supposed to be the one of the - // following (ordered) statuses: pending, building, success, warning, - // error, abort, abnormal. The first 3 statuses are checked for equality, - // the rest - for being greater or equal. - // - string result | rs = "*"; - }; - - class build_configs - { - // Note that the build-configs parameter is renamed to '_' by the root - // handler (see the request_proxy class for details). - // - string class_name | _ = "all"; - - // Display build configurations list starting from this page. - // - uint16_t page | p; - }; - - // Parameters, except simulate, must either be all present (actual - // submission) or absent (submission form request). - // - // Note also that besides these parameters there can be others. We don't - // recognize their semantics and just save them to the submission request - // manifest. - // - class submit - { - // Package archive file name. Must be <input type="file"/>. - // - // Note that it can potentially be not just a name but a file path and - // in the client's form (e.g., Windows). - // - string archive; - - // Package archive file SHA256 checksum. - // - string sha256sum; - - // Submission simulation outcome. - // - string simulate; - }; - - // Parameters, except simulate, must either be all present (actual CI - // request) or absent (CI form request). - // - // Note also that besides these parameters there can be others. We don't - // recognize their semantics and just save them to the CI request - // manifest. - // - class ci - { - // Package repository location. - // - // Note that the ci parameter is renamed to '_' by the root handler (see - // the request_proxy class for details). - // - bpkg::repository_location repository | _; - - // Package names/versions. - // - strings package; - - // Overrides file name. Must be <input type="file"/>. - // - // Note that we don't really need this name and only check if this - // parameter is specified to detect presence of the upload. - // - string overrides; - - // Submission simulation outcome. - // - string simulate; - }; - } -} diff --git a/mod/page.cxx b/mod/page.cxx index 64e31c0..c7dc403 100644 --- a/mod/page.cxx +++ b/mod/page.cxx @@ -16,9 +16,10 @@ #include <libbutl/url.mxx> -#include <web/xhtml.hxx> -#include <web/xhtml-fragment.hxx> -#include <web/mime-url-encoding.hxx> +#include <web/xhtml/fragment.hxx> +#include <web/xhtml/serialization.hxx> + +#include <web/server/mime-url-encoding.hxx> #include <libbrep/package.hxx> #include <libbrep/package-odb.hxx> diff --git a/mod/page.hxx b/mod/page.hxx index 8c92d10..49d8608 100644 --- a/mod/page.hxx +++ b/mod/page.hxx @@ -8,7 +8,7 @@ #include <libbbot/manifest.hxx> -#include <web/xhtml-fragment.hxx> +#include <web/xhtml/fragment.hxx> #include <libbrep/types.hxx> #include <libbrep/utility.hxx> diff --git a/mod/services.cxx b/mod/services.cxx index 7739011..b17e32e 100644 --- a/mod/services.cxx +++ b/mod/services.cxx @@ -3,7 +3,7 @@ #include <ap_config.h> // AP_MODULE_DECLARE_DATA -#include <web/apache/service.hxx> +#include <web/server/apache/service.hxx> #include <libbrep/types.hxx> #include <libbrep/utility.hxx> diff --git a/mod/types-parsers.cxx b/mod/types-parsers.cxx index 70d77dd..ceaab29 100644 --- a/mod/types-parsers.cxx +++ b/mod/types-parsers.cxx @@ -3,7 +3,7 @@ #include <mod/types-parsers.hxx> -#include <mod/options.hxx> +#include <mod/module-options.hxx> using namespace std; using namespace bpkg; diff --git a/mod/types-parsers.hxx b/mod/types-parsers.hxx index a81ef90..091c868 100644 --- a/mod/types-parsers.hxx +++ b/mod/types-parsers.hxx @@ -9,7 +9,7 @@ #include <libbpkg/manifest.hxx> // repository_location -#include <web/xhtml-fragment.hxx> +#include <web/xhtml/fragment.hxx> #include <libbrep/types.hxx> #include <libbrep/utility.hxx> diff --git a/monitor/.gitignore b/monitor/.gitignore new file mode 100644 index 0000000..21c0e0b --- /dev/null +++ b/monitor/.gitignore @@ -0,0 +1,2 @@ +*-options.?xx +brep-monitor diff --git a/monitor/buildfile b/monitor/buildfile new file mode 100644 index 0000000..dc49a98 --- /dev/null +++ b/monitor/buildfile @@ -0,0 +1,45 @@ +# file : monitor/buildfile +# license : MIT; see accompanying LICENSE file + +import libs = libodb%lib{odb} +import libs += libodb-pgsql%lib{odb-pgsql} +import libs += libbutl%lib{butl} +import libs += libbbot%lib{bbot} + +include ../libbrep/ +include ../mod/ + +exe{brep-monitor}: {hxx ixx cxx}{* -*-options} \ + {hxx ixx cxx}{monitor-options module-options} \ + ../mod/libue{mod} ../libbrep/lib{brep} $libs + +# Build options. +# +obj{monitor}: cxx.poptions += -DBREP_COPYRIGHT=\"$copyright\" + +# Generated options parser. +# +if $cli.configured +{ + cli.cxx{monitor-options}: cli{monitor} + cli.cxx{module-options}: cli{module} + + cli.options += --std c++11 -I $src_root --include-with-brackets \ +--include-prefix monitor --guard-prefix MONITOR --generate-specifier \ +--cli-namespace brep::cli + + cli.cxx{monitor-options}: cli.options += \ +--page-usage print_ --ansi-color --long-usage + + cli.cxx{module-options}: cli.options += --suppress-usage --generate-parse + + # Include the generated cli files into the distribution and don't remove + # them when cleaning in src (so that clean results in a state identical to + # distributed). + # + cli.cxx{*}: + { + dist = true + clean = ($src_root != $out_root) + } +} diff --git a/monitor/module.cli b/monitor/module.cli new file mode 100644 index 0000000..c299c5f --- /dev/null +++ b/monitor/module.cli @@ -0,0 +1,16 @@ +// file : monitor/module.cli +// license : MIT; see accompanying LICENSE file + +include <mod/module.cli>; + +namespace brep +{ + namespace options + { + // brep web module configuration options we are interested in. + // + class module: build_task + { + }; + } +} diff --git a/monitor/monitor.cli b/monitor/monitor.cli new file mode 100644 index 0000000..33b05f7 --- /dev/null +++ b/monitor/monitor.cli @@ -0,0 +1,176 @@ +// file : monitor/monitor.cli +// license : MIT; see accompanying LICENSE file + +include <vector>; +include <string>; +include <cstddef>; // size_t +include <cstdint>; // uint16_t + +include <mod/module.cli>; // Reuse CLI support types. + +"\section=1" +"\name=brep-monitor" +"\summary=monitor brep infrastructure" + +namespace brep +{ + namespace options + { + { + "<options> <brep-config> <toolchain> <name> <version>", + + "\h|SYNOPSIS| + + \c{\b{brep-monitor --help}\n + \b{brep-monitor --version}\n + \b{brep-monitor} [<options>] <brep-config> <toolchain> [<toolchain>...]} + + \c{<toolchain> = <name>[\b{/}<version>]} + + \h|DESCRIPTION| + + \cb{brep-monitor} analyzes the \cb{brep} internal state and reports the + infrastructure issues printing their descriptions to \cb{stderr}. + + The specified \cb{brep} configuration file (<brep-config>) is used to + retrieve information required to access the databases and deduce the + expected behavior. Most of this information can be overridden via the + command line options. + + Currently, only delayed package builds for the specified toolchains are + reported. If toolchain version is omitted then all package builds with + this toolchain name are considered. + + \cb{brep-monitor} maintains its own state in the brep \cb{build} + database. In particular, it records timestamps of the reported package + build delays and optionally omits them from being reported again during + the timeout specified with the \cb{--report-timeout} option. + + By default, a brief report is printed. Use the \cb{--full-report} + option to obtain the full report (which may be large). + + Note that \cb{brep-monitor} expects the \cb{build} database schema to + have already been created using \l{brep-migrate(1)}." + } + + class monitor + { + "\h|OPTIONS|" + + std::size_t --build-timeout + { + "<seconds>", + "Time to wait (in seconds) before considering a package build as + delayed. If unspecified, the sum of \cb{brep}'s + \cb{build-normal-rebuild-timeout} and \cb{build-result-timeout} + configuration option values is used. Note also that an archived + package that is unbuilt is always considered delayed." + } + + std::size_t --report-timeout + { + "<seconds>", + "Time to wait (in seconds) before repeating a report of a package + build delay. By default there is no delay and all reports are + repeated." + } + + bool --full-report + { + "Print the list of delayed package builds rather than just their number + per build configuration." + } + + bool --clean + { + "Additionally clean the monitor state removing outdated information + related to non-existent packages, configurations, etc." + } + + // Note that the web service would normally logs in under a different + // user (and potentially switch the role afterwords) and so falling back + // to brep's user name and password wouldn't make much sense. + // + std::string --build-db-user|-u + { + "<user>", + "\cb{build} database user name. If unspecified, then operating system + (login) name is used." + } + + std::string --build-db-password + { + "<pass>", + "\cb{build} database password. If unspecified, then login without + password is expected to work." + } + + std::string --build-db-name|-n = "brep_package" + { + "<name>", + "\cb{build} database name. If unspecified, then \cb{brep}'s + \cb{build-db-name} configuration option value is used." + } + + std::string --build-db-host|-h + { + "<host>", + "\cb{build} database host name, address, or socket. If unspecified, + then \cb{brep}'s \cb{build-db-host} configuration option value is + used." + } + + std::uint16_t --build-db-port|-p + { + "<port>", + "\cb{build} database port number. If unspecified, then \cb{brep}'s + \cb{build-db-port} configuration option value is used." + } + + std::string --pager // String to allow empty value. + { + "<path>", + "The pager program to be used to show long text. Commonly used pager + programs are \cb{less} and \cb{more}. You can also specify additional + options that should be passed to the pager program with + \cb{--pager-option}. If an empty string is specified as the pager + program, then no pager will be used. If the pager program is not + explicitly specified, then \cb{brep-monitor} will try to use + \cb{less}. If it is not available, then no pager will be used." + } + + std::vector<std::string> --pager-option + { + "<opt>", + "Additional option to be passed to the pager program. See \cb{--pager} + for more information on the pager program. Repeat this option to + specify multiple pager options." + } + + bool --help {"Print usage information and exit."} + bool --version {"Print version and exit."} + }; + + "\h|EXIT STATUS| + + \dl| + + \li|\cb{0} + + Success.| + + \li|\cb{1} + + Fatal error.| + + \li|\cb{2} + + An instance of \cb{brep-monitor} or some other \cb{brep} utility is + already running. Try again.| + + \li|\cb{3} + + Recoverable database error. Try again.|| + " + } +} diff --git a/monitor/monitor.cxx b/monitor/monitor.cxx new file mode 100644 index 0000000..e04c5e1 --- /dev/null +++ b/monitor/monitor.cxx @@ -0,0 +1,766 @@ +// file : monitor/monitor.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <map> +#include <set> +#include <chrono> +#include <iostream> +#include <algorithm> // find_if() + +#include <odb/database.hxx> +#include <odb/transaction.hxx> +#include <odb/schema-catalog.hxx> + +#include <odb/pgsql/database.hxx> + +#include <libbutl/pager.mxx> +#include <libbutl/utility.mxx> // compare_c_string + +#include <libbbot/build-config.hxx> + +#include <libbrep/build.hxx> +#include <libbrep/common.hxx> +#include <libbrep/build-odb.hxx> +#include <libbrep/build-package.hxx> +#include <libbrep/build-package-odb.hxx> +#include <libbrep/database-lock.hxx> + +#include <mod/build-config.hxx> + +#include <monitor/module-options.hxx> +#include <monitor/monitor-options.hxx> + +using namespace std; +using namespace butl; +using namespace bbot; +using namespace odb::core; + +namespace brep +{ + // Operation failed, diagnostics has already been issued. + // + struct failed {}; + + static const char* help_info ( + " info: run 'brep-monitor --help' for more information"); + + static int + main (int argc, char* argv[]) + try + { + cli::argv_scanner scan (argc, argv); + options::monitor ops (scan); + + // Version. + // + if (ops.version ()) + { + cout << "brep-monitor " << BREP_VERSION_ID << endl + << "libbrep " << LIBBREP_VERSION_ID << endl + << "libbbot " << LIBBBOT_VERSION_ID << endl + << "libbpkg " << LIBBPKG_VERSION_ID << endl + << "libbutl " << LIBBUTL_VERSION_ID << endl + << "Copyright (c) " << BREP_COPYRIGHT << "." << endl + << "This is free software released under the MIT license." << endl; + + return 0; + } + + // Help. + // + if (ops.help ()) + { + pager p ("brep-monitor help", + false, + ops.pager_specified () ? &ops.pager () : nullptr, + &ops.pager_option ()); + + print_usage (p.stream ()); + + // If the pager failed, assume it has issued some diagnostics. + // + return p.wait () ? 0 : 1; + } + + // Parse the brep module configuration. + // + options::module mod_ops; + { + if (!scan.more ()) + { + cerr << "error: brep module configuration file is expected" << endl + << help_info << endl; + return 1; + } + + string f (scan.next ()); + + try + { + cli::argv_file_scanner scan (f, "" /* option */); + + // Parse the brep module options skipping those we don't recognize. + // + while (scan.more ()) + { + // Parse until an unknown option is encountered. + // + mod_ops.parse (scan, + cli::unknown_mode::stop, + cli::unknown_mode::stop); + + // Skip the unknown option, unless we are done. + // + if (scan.more ()) + { + // Skip the option name. + // + size_t l (scan.peek_line ()); + scan.skip (); + + // Skip the option value, if present. + // + // Note that here we rely on the configuration file having both + // the option name and its value on the same line. + // + if (scan.more () && scan.peek_line () == l) + scan.skip (); + } + } + } + catch (const cli::file_io_failure& e) + { + cerr << "error: unable to parse brep module configuration: " << e + << endl; + return 1; + } + catch (const cli::exception& e) + { + cerr << "error: unable to parse brep module configuration file '" << f + << "': " << e << endl; + return 1; + } + } + + if (!mod_ops.build_config_specified ()) + { + cerr << "warning: package building functionality is disabled" << endl; + return 0; + } + + // Parse the toolchains suppressing duplicates. + // + // Note that specifying a toolchain both with and without version doesn't + // make sense, so we fail if that's the case. + // + vector<pair<string, version>> toolchains; + + if (!scan.more ()) + { + cerr << "error: toolchain is expected" << endl << help_info << endl; + return 1; + } + + while (scan.more ()) + { + string s (scan.next ()); + + string tn; + version tv; + + try + { + size_t p (s.find ('/')); + + if (p == string::npos) + tn = move (s); + else + { + tn.assign (s, 0, p); + tv = version (string (s, p + 1)); + } + + bool dup (false); + for (const pair<string, version>& t: toolchains) + { + if (tn == t.first) + { + if (tv == t.second) + { + dup = true; + break; + } + + if (tv.empty () != t.second.empty ()) + { + cerr << "error: toolchain '" << tn << "' is specified both " + << "with and without version" << endl; + return 1; + } + } + } + + if (!dup) + toolchains.emplace_back (move (tn), move (tv)); + } + catch (const invalid_argument& e) + { + cerr << "error: invalid toolchain '" << s << "': " << e << endl; + return 1; + } + } + + // Parse buildtab. + // + build_configs configs; + + try + { + configs = parse_buildtab (mod_ops.build_config ()); + } + catch (const tab_parsing& e) + { + cerr << "error: unable to parse buildtab: " << e << endl; + return 1; + } + catch (const io_error& e) + { + cerr << "error: unable to read '" << mod_ops.build_config () << "': " + << e << endl; + return 1; + } + + // Create the database instance. + // + odb::pgsql::database db ( + ops.build_db_user (), + ops.build_db_password (), + (ops.build_db_name_specified () + ? ops.build_db_name () + : mod_ops.build_db_name ()), + (ops.build_db_host_specified () + ? ops.build_db_host () + : mod_ops.build_db_host ()), + (ops.build_db_port_specified () + ? ops.build_db_port () + : mod_ops.build_db_port ()), + "options='-c default_transaction_isolation=serializable'"); + + // Prevent several brep utility instances from updating the build database + // simultaneously. + // + database_lock l (db); + + // Check that the database schema matches the current one. + // + const string ds ("build"); + if (schema_catalog::current_version (db, ds) != db.schema_version (ds)) + { + cerr << "error: build database schema differs from the current one" + << endl + << " info: use brep-migrate to migrate the database" << endl; + return 1; + } + + // If requested, cleanup delays for package builds that are not expected + // anymore (build configuration is not present, etc). + // + if (ops.clean ()) + { + using config_map = map<const char*, + const build_config*, + compare_c_string>; + + config_map conf_map; + for (const build_config& c: configs) + conf_map[c.name.c_str ()] = &c; + + // Prepare the build delay prepared query. + // + // Query package build delays in chunks in order not to hold locks for + // too long. Sort the result by package version as a first priority to + // minimize number of queries to the package database. Note that we + // still need to sort by configuration and toolchain to make sure that + // build delays are sorted consistently across queries and we don't miss + // any of them. + // + using query = query<build_delay>; + using prep_query = prepared_query<build_delay>; + + // Specify the portion. + // + size_t offset (0); + + query q ("ORDER BY" + + query::id.package.tenant + "," + + query::id.package.name + + order_by_version (query::id.package.version, + false /* first */) + "," + + query::id.configuration + "," + + query::id.toolchain_name + + order_by_version (query::id.toolchain_version, + false /* first */) + + "OFFSET" + query::_ref (offset) + "LIMIT 100"); + + connection_ptr conn (db.connection ()); + + prep_query pq ( + conn->prepare_query<build_delay> ("build-delay-query", q)); + + // Cache the delayed build package object to reuse it in case the next + // delay refers to the same package (which is often the case due to the + // query result sorting criteria we use). + // + package_id pid; + shared_ptr<build_package> p; + + for (bool ne (true); ne; ) + { + transaction t (conn->begin ()); + + // Query delays. + // + auto delays (pq.execute ()); + + if ((ne = !delays.empty ())) + { + // Iterate over the build delays and cleanup the outdated ones. + // + for (const build_delay& d: delays) + { + config_map::const_iterator ci; + + bool cleanup ( + // Check that the toolchain is still used. + // + find_if (toolchains.begin (), toolchains.end (), + [&d] (const pair<string, version>& t) + { + return t.first == d.toolchain_name && + t.second == d.toolchain_version; + }) == toolchains.end () || + // + // Check that the build configuration is still present. + // + (ci = conf_map.find (d.configuration.c_str ())) == + conf_map.end ()); + + // Check that the package still present, is buildable and doesn't + // exclude the build configuration. + // + if (!cleanup) + { + if (d.id.package != pid) + { + pid = d.id.package; + p = db.find<build_package> (pid); + } + + cleanup = (p == nullptr || + !p->buildable || + exclude (p->builds, + p->constraints, + *ci->second, + configs.class_inheritance_map)); + } + + if (cleanup) + db.erase (d); + else + ++offset; + } + } + + t.commit (); + } + } + + // Collect and report delays as separate steps not to hold database locks + // while printing to stderr. Also we need to properly order delays for + // printing. + // + // Iterate through all possible package builds creating the list of delays + // with the following sort priority: + // + // 1: toolchain name + // 2: toolchain version (descending) + // 3: configuration name + // 4: tenant + // 5: package name + // 6: package version (descending) + // + // Such ordering will allow us to group build delays by toolchain and + // configuration while printing the report. + // + struct compare_delay + { + bool + operator() (const shared_ptr<const build_delay>& x, + const shared_ptr<const build_delay>& y) const + { + if (int r = x->toolchain_name.compare (y->toolchain_name)) + return r < 0; + + if (int r = x->toolchain_version.compare (y->toolchain_version)) + return r > 0; + + if (int r = x->configuration.compare (y->configuration)) + return r < 0; + + if (int r = x->tenant.compare (y->tenant)) + return r < 0; + + if (int r = x->package_name.compare (y->package_name)) + return r < 0; + + return x->package_version.compare (y->package_version) > 0; + } + }; + + set<shared_ptr<const build_delay>, compare_delay> delays; + { + connection_ptr conn (db.connection ()); + + // Prepare the buildable package prepared query. + // + // Query buildable packages in chunks in order not to hold locks for + // too long. + // + using pquery = query<buildable_package>; + using prep_pquery = prepared_query<buildable_package>; + + // Specify the portion. + // + size_t offset (0); + + pquery pq ("ORDER BY" + + pquery::build_package::id.tenant + "," + + pquery::build_package::id.name + + order_by_version (pquery::build_package::id.version, + false /* first */) + + "OFFSET" + pquery::_ref (offset) + "LIMIT 50"); + + prep_pquery ppq ( + conn->prepare_query<buildable_package> ("buildable-package-query", + pq)); + + // Prepare the package build prepared query. + // + // This query will only be used for toolchains that have no version + // specified on the command line to obtain the latest build across all + // toolchain versions. + // + using bquery = query<package_build>; + using prep_bquery = prepared_query<package_build>; + + build_id id; + const auto& bid (bquery::build::id); + + bquery bq ((equal<package_build> (bid.package, id.package) && + bid.configuration == bquery::_ref (id.configuration) && + bid.toolchain_name == bquery::_ref (id.toolchain_name)) + + "ORDER BY" + bquery::build::timestamp + "DESC" + "LIMIT 1"); + + prep_bquery pbq ( + conn->prepare_query<package_build> ("package-build-query", bq)); + + timestamp::duration build_timeout ( + ops.build_timeout_specified () + ? chrono::seconds (ops.build_timeout ()) + : chrono::seconds (mod_ops.build_normal_rebuild_timeout () + + mod_ops.build_result_timeout ())); + + timestamp now (system_clock::now ()); + + timestamp build_expiration (now - build_timeout); + + timestamp report_expiration ( + now - chrono::seconds (ops.report_timeout ())); + + for (bool ne (true); ne; ) + { + transaction t (conn->begin ()); + + // Query buildable packages (and cache the result). + // + auto bps (ppq.execute ()); + + if ((ne = !bps.empty ())) + { + offset += bps.size (); + + for (auto& bp: bps) + { + shared_ptr<build_package> p (db.load<build_package> (bp.id)); + + for (const build_config& c: configs) + { + if (exclude (p->builds, + p->constraints, + c, + configs.class_inheritance_map)) + continue; + + for (const pair<string, version>& t: toolchains) + { + id = build_id (p->id, c.name, t.first, t.second); + + // If the toolchain version is not specified then search for + // the latest build across all toolchain versions and search + // for a specific build otherwise. + // + shared_ptr<build> b; + + if (id.toolchain_version.empty ()) + { + auto pbs (pbq.execute ()); + + if (!pbs.empty ()) + b = move (pbs.begin ()->build); + } + else + b = db.find<build> (id); + + // Note that we consider a build as delayed if it is not + // completed in the expected timeframe. So even if the build + // task have been issued recently we may still consider the + // build as delayed. + // + timestamp bct (b != nullptr + ? b->completion_timestamp + : timestamp_nonexistent); + + // Create the delay object to record a timestamp when the + // package build could have potentially been started, unless + // it already exists. + // + shared_ptr<build_delay> d (db.find<build_delay> (id)); + + if (d == nullptr) + { + // If the archived package has no build nor build delay + // for this configuration, then we assume that the + // configuration was added after the package tenant has + // been archived and so the package could have never been + // built for this configuration. Thus, we don't consider + // this build as delayed and so skip it. + // + if (bp.archived && b == nullptr) + continue; + + // Use the build completion or build status change + // timestamp, whichever is earlier, as the build delay + // tracking starting point and fallback to the current time + // if there is no build yet. + // + timestamp pts ( + b == nullptr ? now : + bct != timestamp_nonexistent && bct < b->timestamp ? bct : + b->timestamp); + + d = make_shared<build_delay> (move (id.package.tenant), + move (id.package.name), + p->version, + move (id.configuration), + move (id.toolchain_name), + t.second, + pts); + db.persist (d); + } + + // Handle package builds differently based on their tenant's + // archive status. + // + // If the package is not archived then consider it as delayed + // if it is not (re-)built by the expiration time. Otherwise, + // consider it as delayed if it is unbuilt. + // + bool delayed; + + if (!bp.archived) + { + timestamp bts (bct != timestamp_nonexistent + ? bct + : d->package_timestamp); + + delayed = (bts <= build_expiration); + } + else + delayed = (bct == timestamp_nonexistent); + + // If the report timeout is not specified then report the + // delay unconditionally. Otherwise, report the active package + // build delay if the report timeout is expired and the + // archived package build delay if it was never reported. Note + // that fixing the building infrastructure won't help building + // an archived package, so reporting its build delays + // repeatedly is meaningless. + // + if (delayed && + (!ops.report_timeout_specified () || + (!bp.archived + ? d->report_timestamp <= report_expiration + : d->report_timestamp == timestamp_nonexistent))) + { + // Note that we update the delay objects persistent state + // later, after we successfully print the report. + // + d->report_timestamp = now; + delays.insert (move (d)); + } + } + } + } + } + + t.commit (); + } + } + + // Report package build delays, if any. + // + if (!delays.empty ()) + try + { + // Print the report. + // + cerr.exceptions (ostream::badbit | ostream::failbit); + + cerr << "Package build delays (" << delays.size () << "):" << endl; + + // Group the printed delays by toolchain and configuration. + // + const string* toolchain_name (nullptr); + const version* toolchain_version (nullptr); + const string* configuration (nullptr); + + // Print the delayed package build number per configuration rather than + // the packages themselves in the brief report mode. + // + size_t config_build_count (0); + + auto brief_config = [&configuration, &config_build_count] () + { + if (configuration != nullptr) + { + cerr << " " << *configuration << " (" << config_build_count + << ")" << endl; + + config_build_count = 0; + } + }; + + for (shared_ptr<const build_delay> d: delays) + { + // Print the toolchain, if changed. + // + if (toolchain_name == nullptr || + d->toolchain_name != *toolchain_name || + d->toolchain_version != *toolchain_version) + { + if (!ops.full_report ()) + brief_config (); + + if (toolchain_name != nullptr) + cerr << endl; + + cerr << " " << d->toolchain_name; + + if (!d->toolchain_version.empty ()) + cerr << "/" << d->toolchain_version; + + cerr << endl; + + toolchain_name = &d->toolchain_name; + toolchain_version = &d->toolchain_version; + configuration = nullptr; + } + + // Print the configuration, if changed. + // + if (configuration == nullptr || d->configuration != *configuration) + { + if (ops.full_report ()) + { + if (configuration != nullptr) + cerr << endl; + + cerr << " " << d->configuration << endl; + } + else + brief_config (); + + configuration = &d->configuration; + } + + // Print the delayed build package in the full report mode and count + // configuration builds otherwise. + // + if (ops.full_report ()) + { + // We can potentially extend this information with the archived flag + // or the delay duration. + // + cerr << " " << d->package_name << "/" << d->package_version; + + if (!d->tenant.empty ()) + cerr << " " << d->tenant; + + cerr << endl; + } + else + ++config_build_count; + } + + if (!ops.full_report ()) + brief_config (); + + // Persist the delay report timestamps. + // + transaction t (db.begin ()); + + for (shared_ptr<const build_delay> d: delays) + db.update (d); + + t.commit (); + } + catch (const io_error&) + { + return 1; // Not much we can do on stderr writing failure. + } + + return 0; + } + catch (const database_locked&) + { + cerr << "brep-monitor or some other brep utility is running" << endl; + return 2; + } + catch (const recoverable& e) + { + cerr << "recoverable database error: " << e << endl; + return 3; + } + catch (const cli::exception& e) + { + cerr << "error: " << e << endl << help_info << endl; + return 1; + } + catch (const failed&) + { + return 1; // Diagnostics has already been issued. + } + // Fully qualified to avoid ambiguity with odb exception. + // + catch (const std::exception& e) + { + cerr << "error: " << e << endl; + return 1; + } +} + +int +main (int argc, char* argv[]) +{ + return brep::main (argc, argv); +} diff --git a/tests/web/xhtml/buildfile b/tests/web/xhtml/buildfile index 6ddd5ae..ff683b9 100644 --- a/tests/web/xhtml/buildfile +++ b/tests/web/xhtml/buildfile @@ -1,7 +1,7 @@ # file : tests/web/xhtml/buildfile # license : MIT; see accompanying LICENSE file -include ../../../web/ +include ../../../web/xhtml/ -exe{driver}: {hxx cxx}{*} ../../../web/libus{web} +exe{driver}: {hxx cxx}{*} ../../../web/xhtml/libue{xhtml} exe{driver}: file{test.out}: test.stdout = true diff --git a/tests/web/xhtml/driver.cxx b/tests/web/xhtml/driver.cxx index ff554e4..a0135de 100644 --- a/tests/web/xhtml/driver.cxx +++ b/tests/web/xhtml/driver.cxx @@ -6,7 +6,7 @@ #include <libstudxml/serializer.hxx> -#include <web/xhtml.hxx> +#include <web/xhtml/serialization.hxx> using namespace std; using namespace xml; diff --git a/web/.gitignore b/web/.gitignore deleted file mode 100644 index 426db9e..0000000 --- a/web/.gitignore +++ /dev/null @@ -1 +0,0 @@ -version.hxx diff --git a/web/apache/log.hxx b/web/apache/log.hxx deleted file mode 100644 index 6609190..0000000 --- a/web/apache/log.hxx +++ /dev/null @@ -1,80 +0,0 @@ -// file : web/apache/log.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_APACHE_LOG_HXX -#define WEB_APACHE_LOG_HXX - -#include <httpd.h> // request_rec, server_rec -#include <http_log.h> -#include <http_config.h> // module - -#include <cstdint> // uint64_t -#include <algorithm> // min() - -#include <web/module.hxx> - -namespace web -{ - namespace apache - { - class log: public web::log - { - public: - - log (server_rec* s, const ::module* m) noexcept - : server_ (s), module_ (m) {} - - virtual void - write (const char* msg) {write (APLOG_ERR, msg);} - - // Apache-specific interface. - // - void - write (int level, const char* msg) const noexcept - { - write (nullptr, 0, nullptr, level, msg); - } - - void - write (const char* file, - std::uint64_t line, - const char* func, - int level, - const char* msg) const noexcept - { - if (file && *file) - file = nullptr; // Skip file/line placeholder from log line. - - level = std::min (level, APLOG_TRACE8); - - if (func) - ap_log_error (file, - line, - module_->module_index, - level, - 0, - server_, - "[%s]: %s", - func, - msg); - else - // Skip function name placeholder from log line. - // - ap_log_error (file, - line, - module_->module_index, - level, - 0, - server_, - ": %s", - msg); - } - - private: - server_rec* server_; - const ::module* module_; // Apache module. - }; - } -} - -#endif // WEB_APACHE_LOG_HXX diff --git a/web/apache/request.cxx b/web/apache/request.cxx deleted file mode 100644 index 4722b7f..0000000 --- a/web/apache/request.cxx +++ /dev/null @@ -1,1005 +0,0 @@ -// file : web/apache/request.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include <web/apache/request.hxx> - -#include <apr.h> // APR_SIZE_MAX -#include <apr_errno.h> // apr_status_t, APR_SUCCESS, APR_E*, apr_strerror() -#include <apr_tables.h> // apr_table_*, apr_table_*(), apr_array_header_t -#include <apr_strings.h> // apr_pstrdup() -#include <apr_buckets.h> // apr_bucket*, apr_bucket_*(), apr_brigade_*(), - // APR_BRIGADE_*() - -#include <httpd.h> // request_rec, HTTP_*, OK -#include <http_protocol.h> // ap_*() - -#include <apreq2/apreq.h> // APREQ_* -#include <apreq2/apreq_util.h> // apreq_brigade_copy() -#include <apreq2/apreq_param.h> // apreq_param_t, apreq_value_to_param() -#include <apreq2/apreq_parser.h> // apreq_parser_t, apreq_parser_make() - -#include <ctime> // strftime(), time_t -#include <vector> -#include <chrono> -#include <memory> // unique_ptr -#include <string> -#include <cassert> -#include <ostream> -#include <istream> -#include <cstring> // str*(), memcpy(), size_t -#include <utility> // move() -#include <iterator> // istreambuf_iterator -#include <stdexcept> // invalid_argument, runtime_error -#include <exception> // current_exception() -#include <streambuf> -#include <algorithm> // min() - -#include <libbutl/utility.mxx> // icasecmp() -#include <libbutl/optional.mxx> -#include <libbutl/timestamp.mxx> - -#include <web/mime-url-encoding.hxx> - -using namespace std; -using namespace butl; - -namespace web -{ - namespace apache - { - [[noreturn]] static void - throw_internal_error (apr_status_t s, const string& what) - { - char buf[1024]; - throw runtime_error (what + ": " + apr_strerror (s, buf, sizeof (buf))); - } - - // Extend the Apache stream with checking for the read limit and caching - // the content if requested. Replay the cached content after rewind. - // - class istreambuf_cache: public istreambuf - { - enum class mode - { - cache, // Read from Apache stream, save the read data into the cache. - replay, // Read from the cache. - proxy // Read from Apache stream (don't save into the cache). - }; - - public: - istreambuf_cache (size_t read_limit, size_t cache_limit, - request_rec* r, - stream_state& s, - size_t bufsize = 1024, - size_t putback = 1) - : istreambuf (r, s, bufsize, putback), - read_limit_ (read_limit), - cache_limit_ (cache_limit) - { - } - - void - rewind () - { - // Fail if some content is already missed in the cache. - // - if (mode_ == mode::proxy) - throw sequence_error ( - string ("web::apache::istreambuf_cache::rewind: ") + - (cache_limit_ > 0 - ? "half-buffered" - : "unbuffered")); - - mode_ = mode::replay; - replay_pos_ = 0; - setg (nullptr, nullptr, nullptr); - } - - void - limits (size_t read_limit, size_t cache_limit) - { - if (read_limit > 0) - read_limit_ = read_limit; - - if (cache_limit > 0) - { - // We can not increase the cache limit if some content is already - // missed in the cache. - // - if (cache_limit > cache_limit_ && mode_ == mode::proxy) - throw sequence_error ( - "web::apache::istreambuf_cache::limits: unbuffered"); - - cache_limit_ = cache_limit; - } - } - - size_t read_limit () const noexcept {return read_limit_;} - size_t cache_limit () const noexcept {return cache_limit_;} - - private: - virtual int_type - underflow (); - - private: - // Limits - // - size_t read_limit_; - size_t cache_limit_; - - // State - // - mode mode_ = mode::cache; - size_t read_bytes_ = 0; - bool eof_ = false; // End of Apache stream is reached. - - // Cache - // - struct chunk - { - vector<char> data; - size_t offset; - - chunk (vector<char>&& d, size_t o): data (move (d)), offset (o) {} - - // Make the type move constructible-only to avoid copying of chunks on - // vector growth. - // - chunk (chunk&&) = default; - }; - - vector<chunk> cache_; - size_t cache_size_ = 0; - size_t replay_pos_ = 0; - }; - - istreambuf_cache::int_type istreambuf_cache:: - underflow () - { - if (gptr () < egptr ()) - return traits_type::to_int_type (*gptr ()); - - if (mode_ == mode::replay) - { - if (replay_pos_ < cache_.size ()) - { - chunk& ch (cache_[replay_pos_++]); - char* p (ch.data.data ()); - setg (p, p + ch.offset, p + ch.data.size ()); - return traits_type::to_int_type (*gptr ()); - } - - // No more data to replay, so switch to the cache mode. That includes - // resetting eback, gptr and egptr, so they point into the istreambuf's - // internal buffer. Putback area should also be restored. - // - mode_ = mode::cache; - - // Bail out if the end of stream is reached. - // - if (eof_) - return traits_type::eof (); - - char* p (buf_.data () + putback_); - size_t pb (0); - - // Restore putback area if there is any cached data. Thanks to - // istreambuf, it's all in a single chunk. - // - if (!cache_.empty ()) - { - chunk& ch (cache_.back ()); - pb = min (putback_, ch.data.size ()); - memcpy (p - pb, ch.data.data () + ch.data.size () - pb, pb); - } - - setg (p - pb, p, p); - } - - // Delegate reading to the base class in the cache or proxy modes, but - // check for the read limit first. - // - if (read_limit_ && read_bytes_ >= read_limit_) - throw invalid_request (HTTP_REQUEST_ENTITY_TOO_LARGE, - "payload too large"); - - // Throws the sequence_error exception if some unbuffered content is - // already written. - // - int_type r (istreambuf::underflow ()); - - if (r == traits_type::eof ()) - { - eof_ = true; - return r; - } - - // Increment the read bytes counter. - // - size_t rb (egptr () - gptr ()); - read_bytes_ += rb; - - // In the cache mode save the read data if the cache limit is not - // reached, otherwise switch to the proxy mode. - // - if (mode_ == mode::cache) - { - // Not to complicate things we will copy the buffer into the cache - // together with the putback area, which is OK as it usually takes a - // small fraction of the buffer. By the same reason we will cache the - // whole data read even though we can exceed the limits by - // bufsize - putback - 1 bytes. - // - if (cache_size_ < cache_limit_) - { - chunk ch (vector<char> (eback (), egptr ()), - static_cast<size_t> (gptr () - eback ())); - - cache_.emplace_back (move (ch)); - cache_size_ += rb; - } - else - mode_ = mode::proxy; - } - - return r; - } - - // Stream interface for reading from the Apache's bucket brigade. Put back - // is not supported. - // - // Note that reading from a brigade bucket modifies the brigade in the - // general case. For example, reading from a file bucket adds a new heap - // bucket before the file bucket on every read. Traversing/reading through - // such a bucket brigade effectively loads the whole file into the memory, - // so the subsequent brigade traversal results in iterating over the - // loaded heap buckets. - // - // To avoid such a behavior we will make a shallow copy of the original - // bucket brigade, initially and for each rewind. Then, instead of - // iterating, we will always read from the first bucket removing it after - // the use. - // - class istreambuf_buckets: public streambuf - { - public: - // The bucket brigade must exist during the object's lifetime. - // - explicit - istreambuf_buckets (const apr_bucket_brigade* bs) - : orig_buckets_ (bs), - buckets_ (apr_brigade_create (bs->p, bs->bucket_alloc)) - - { - if (buckets_ == nullptr) - throw_internal_error (APR_ENOMEM, "apr_brigade_create"); - - rewind (); // Copy the original buckets. - } - - void - rewind () - { - // Note that apreq_brigade_copy() appends buckets to the destination, - // so we clean it up first. - // - apr_status_t r (apr_brigade_cleanup (buckets_.get ())); - if (r != APR_SUCCESS) - throw_internal_error (r, "apr_brigade_cleanup"); - - r = apreq_brigade_copy ( - buckets_.get (), - const_cast<apr_bucket_brigade*> (orig_buckets_)); - - if (r != APR_SUCCESS) - throw_internal_error (r, "apreq_brigade_copy"); - - setg (nullptr, nullptr, nullptr); - } - - private: - virtual int_type - underflow () - { - if (gptr () < egptr ()) - return traits_type::to_int_type (*gptr ()); - - // If the get-pointer is not NULL then it points to the data referred - // by the first brigade bucket. As we will bail out or rewrite such a - // pointer now there is no need for the bucket either, so we can - // safely delete it. - // - if (gptr () != nullptr) - { - assert (!APR_BRIGADE_EMPTY (buckets_)); - - // Note that apr_bucket_delete() is a macro and the following - // call ends up badly (with SIGSEGV). - // - // apr_bucket_delete (APR_BRIGADE_FIRST (buckets_)); - // - apr_bucket* b (APR_BRIGADE_FIRST (buckets_)); - apr_bucket_delete (b); - } - - if (APR_BRIGADE_EMPTY (buckets_)) - return traits_type::eof (); - - apr_size_t n; - const char* d; - apr_bucket* b (APR_BRIGADE_FIRST (buckets_)); - apr_status_t r (apr_bucket_read (b, &d, &n, APR_BLOCK_READ)); - - if (r != APR_SUCCESS) - throw_internal_error (r, "apr_bucket_read"); - - char* p (const_cast<char*> (d)); - setg (p, p, p + n); - return traits_type::to_int_type (*gptr ()); - } - - private: - const apr_bucket_brigade* orig_buckets_; - - struct brigade_deleter - { - void operator() (apr_bucket_brigade* p) const - { - if (p != nullptr) - { - apr_status_t r (apr_brigade_destroy (p)); - - // Shouldn't fail unless something is severely damaged. - // - assert (r == APR_SUCCESS); - } - } - }; - - unique_ptr<apr_bucket_brigade, brigade_deleter> buckets_; - }; - - class istream_buckets_base - { - public: - explicit - istream_buckets_base (const apr_bucket_brigade* bs): buf_ (bs) {} - - protected: - istreambuf_buckets buf_; - }; - - class istream_buckets: public istream_buckets_base, public istream - { - public: - explicit - istream_buckets (const apr_bucket_brigade* bs) - // Note that calling dtor for istream object before init() is called - // is undefined behavior. That's the reason for inventing the - // istream_buckets_base class. - // - : istream_buckets_base (bs), istream (&buf_) - { - exceptions (failbit | badbit); - } - - void - rewind () - { - buf_.rewind (); - clear (); // Clears *bit flags (in particular eofbit). - } - }; - - // request - // - request:: - request (request_rec* rec) noexcept - : rec_ (rec) - { - rec_->status = HTTP_OK; - } - - request:: - ~request () - { - } - - void request:: - state (request_state s) - { - assert (s != request_state::initial); - - if (s == state_) - return; // Noop. - - if (s < state_) - { - // Can't "unwind" irrevocable interaction with Apache API. - // - static const char* names[] = { - "initial", "reading", "headers", "writing"}; - - string str ("web::apache::request::set_state: "); - str += names[static_cast<size_t> (state_)]; - str += " to "; - str += names[static_cast<size_t> (s)]; - - throw sequence_error (move (str)); - } - - if (s == request_state::reading) - { - // Prepare request content for reading. - // - int r (ap_setup_client_block (rec_, REQUEST_CHUNKED_DECHUNK)); - - if (r != OK) - throw invalid_request (r); - } - else if (s > request_state::reading && state_ <= request_state::reading) - { - // Read request content if any, discard whatever is received. - // - int r (ap_discard_request_body (rec_)); - - if (r != OK) - throw invalid_request (r); - } - - state_ = s; - } - - void request:: - rewind () - { - // @@ Response cookies buffering is not supported yet. When done will be - // possible to rewind in broader range of cases. - // - if (state_ > request_state::reading) - throw sequence_error ("web::apache::request::rewind: unbuffered"); - - out_.reset (); - out_buf_.reset (); - - rec_->status = HTTP_OK; - - ap_set_content_type (rec_, nullptr); // Unset the output content type. - - // We don't need to rewind the input stream (which well may fail if - // unbuffered) if the form data is already read. - // - if (in_ != nullptr && form_data_ == nullptr) - { - assert (in_buf_ != nullptr); - - in_buf_->rewind (); // Throws if impossible to rewind. - in_->clear (); // Clears *bit flags (in particular eofbit). - } - - // Rewind uploaded file streams. - // - if (uploads_ != nullptr) - { - for (const unique_ptr<istream_buckets>& is: *uploads_) - { - if (is != nullptr) - is->rewind (); - } - } - } - - istream& request:: - content (size_t limit, size_t buffer) - { - // Create the input stream/streambuf if not present, otherwise adjust the - // limits. - // - if (in_ == nullptr) - { - unique_ptr<istreambuf_cache> in_buf ( - new istreambuf_cache (limit, buffer, rec_, *this)); - - in_.reset (new istream (in_buf.get ())); - in_buf_ = move (in_buf); - in_->exceptions (istream::failbit | istream::badbit); - } - else - { - assert (in_buf_ != nullptr); - in_buf_->limits (limit, buffer); - } - - return *in_; - } - - const path& request:: - path () - { - if (path_.empty ()) - { - path_ = path_type (rec_->uri); // Is already URL-decoded. - - // Module request handler can not be called if URI is empty. - // - assert (!path_.empty ()); - } - - return path_; - } - - const name_values& request:: - parameters (size_t limit, bool url_only) - { - if (parameters_ == nullptr || url_only < url_only_parameters_) - { - try - { - if (parameters_ == nullptr) - { - parameters_.reset (new name_values ()); - parse_url_parameters (rec_->args); - } - - if (!url_only && form_data (limit)) - { - // After the form data is parsed we can clean it up for the - // application/x-www-form-urlencoded encoding but not for the - // multipart/form-data (see parse_multipart_parameters() for - // details). - // - if (form_multipart_) - parse_multipart_parameters (*form_data_); - else - { - // Make the character vector a NULL-terminated string. - // - form_data_->push_back ('\0'); - - parse_url_parameters (form_data_->data ()); - *form_data_ = vector<char> (); // Reset the cache. - } - } - } - catch (const invalid_argument&) - { - throw invalid_request (); - } - - url_only_parameters_ = url_only; - } - - return *parameters_; - } - - bool request:: - form_data (size_t limit) - { - if (form_data_ == nullptr) - { - form_data_.reset (new vector<char> ()); - - // We will not consider POST body as a form data if the request is in - // the reading or later state. - // - if (rec_->method_number == M_POST && state_ < request_state::reading) - { - const char* ct (apr_table_get (rec_->headers_in, "Content-Type")); - - if (ct != nullptr) - { - form_multipart_ = icasecmp ("multipart/form-data", ct, 19) == 0; - - if (form_multipart_ || - icasecmp ("application/x-www-form-urlencoded", ct, 33) == 0) - *form_data_ = vector<char> ( - istreambuf_iterator<char> (content (limit)), - istreambuf_iterator<char> ()); - } - } - } - - return !form_data_->empty (); - } - - void request:: - parse_url_parameters (const char* args) - { - assert (parameters_ != nullptr); - - for (auto n (args); n != nullptr; ) - { - const char* v (strchr (n, '=')); - const char* e (strchr (n, '&')); - - if (e != nullptr && e < v) - v = nullptr; - - string name (v != nullptr - ? mime_url_decode (n, v) : - (e - ? mime_url_decode (n, e) - : mime_url_decode (n, n + strlen (n)))); - - optional<string> value; - - if (v++) - value = e - ? mime_url_decode (v, e) - : mime_url_decode (v, v + strlen (v)); - - if (!name.empty () || value) - parameters_->emplace_back (move (name), move (value)); - - n = e ? e + 1 : nullptr; - } - } - - void request:: - parse_multipart_parameters (const vector<char>& body) - { - assert (parameters_ != nullptr && uploads_ == nullptr); - - auto throw_bad_request = [] (apr_status_t s, - status_code sc = HTTP_BAD_REQUEST) - { - char buf[1024]; - throw invalid_request (sc, apr_strerror (s, buf, sizeof (buf))); - }; - - // Create the file upload stream list, filling it with NULLs for the - // parameters parsed from the URL query part. - // - uploads_.reset ( - new vector<unique_ptr<istream_buckets>> (parameters_->size ())); - - // All the required objects (parser, input/output buckets, etc.) will be - // allocated in the request memory pool and so will have the HTTP - // request duration lifetime. - // - apr_pool_t* pool (rec_->pool); - - // Create the input bucket brigade containing a single bucket that - // references the form data. - // - apr_bucket_alloc_t* ba (apr_bucket_alloc_create (pool)); - if (ba == nullptr) - throw_internal_error (APR_ENOMEM, "apr_bucket_alloc_create"); - - apr_bucket_brigade* bb (apr_brigade_create (pool, ba)); - if (bb == nullptr) - throw_internal_error (APR_ENOMEM, "apr_brigade_create"); - - apr_bucket* b ( - apr_bucket_immortal_create (body.data (), body.size (), ba)); - - if (b == nullptr) - throw_internal_error (APR_ENOMEM, "apr_bucket_immortal_create"); - - APR_BRIGADE_INSERT_TAIL (bb, b); - - if ((b = apr_bucket_eos_create (ba)) == nullptr) - throw_internal_error (APR_ENOMEM, "apr_bucket_eos_create"); - - APR_BRIGADE_INSERT_TAIL (bb, b); - - // Make sure that the parser will not swap the parsed data to disk - // passing the maximum possible value for the brigade limit. This way - // the resulting buckets will reference the form data directly, making - // no copies. This why we should not reset the form data cache after - // the parsing. - // - // Note that in future we may possibly setup the parser to read from the - // Apache internals directly and enable swapping the data to disk to - // minimize memory consumption. - // - apreq_parser_t* parser ( - apreq_parser_make (pool, - ba, - apr_table_get (rec_->headers_in, "Content-Type"), - apreq_parse_multipart, - APR_SIZE_MAX /* brigade_limit */, - nullptr /* temp_dir */, - nullptr /* hook */, - nullptr /* ctx */)); - - if (parser == nullptr) - throw_internal_error (APR_ENOMEM, "apreq_parser_make"); - - // Create the output table that will be filled with the parsed - // parameters. - // - apr_table_t* params (apr_table_make (pool, APREQ_DEFAULT_NELTS)); - if (params == nullptr) - throw_internal_error (APR_ENOMEM, "apr_table_make"); - - // Parse the form data. - // - apr_status_t r (apreq_parser_run (parser, params, bb)); - if (r != APR_SUCCESS) - throw_bad_request (r); - - // Fill the parameter and file upload stream lists. - // - const apr_array_header_t* ps (apr_table_elts (params)); - size_t n (ps->nelts); - - for (auto p (reinterpret_cast<const apr_table_entry_t*> (ps->elts)); - n--; ++p) - { - assert (p->key != nullptr && p->val != nullptr); - - if (*p->key != '\0') - { - parameters_->emplace_back (p->key, optional<string> (p->val)); - - const apreq_param_t* ap (apreq_value_to_param (p->val)); - assert (ap != nullptr); // Must always be resolvable. - - uploads_->emplace_back (ap->upload != nullptr - ? new istream_buckets (ap->upload) - : nullptr); - } - } - } - - request::uploads_type& request:: - uploads () const - { - if (parameters_ == nullptr || url_only_parameters_) - sequence_error ("web::apache::request::uploads"); - - if (uploads_ == nullptr) - throw invalid_argument ("no uploads"); - - assert (uploads_->size () == parameters_->size ()); - return *uploads_; - } - - istream& request:: - open_upload (size_t index) - { - uploads_type& us (uploads ()); - size_t n (us.size ()); - - if (index >= n) - throw invalid_argument ("invalid index"); - - const unique_ptr<istream_buckets>& is (us[index]); - - if (is == nullptr) - throw invalid_argument ("no upload"); - - return *is; - } - - istream& request:: - open_upload (const string& name) - { - uploads_type& us (uploads ()); - size_t n (us.size ()); - - istream* r (nullptr); - for (size_t i (0); i < n; ++i) - { - if ((*parameters_)[i].name == name) - { - istream* is (us[i].get ()); - - if (is != nullptr) - { - if (r != nullptr) - throw invalid_argument ("multiple uploads for '" + name + "'"); - - r = is; - } - } - } - - if (r == nullptr) - throw invalid_argument ("no upload"); - - return *r; - } - - const name_values& request:: - headers () - { - if (headers_ == nullptr) - { - headers_.reset (new name_values ()); - - const apr_array_header_t* ha (apr_table_elts (rec_->headers_in)); - size_t n (ha->nelts); - - headers_->reserve (n + 1); // One for the custom :Client-IP header. - - auto add = [this] (const char* n, const char* v) - { - assert (n != nullptr && v != nullptr); - headers_->emplace_back (n, optional<string> (v)); - }; - - for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts)); - n--; ++h) - add (h->key, h->val); - - assert (rec_->connection != nullptr); - - add (":Client-IP", rec_->connection->client_ip); - } - - return *headers_; - } - - const name_values& request:: - cookies () - { - if (cookies_ == nullptr) - { - cookies_.reset (new name_values ()); - - const apr_array_header_t* ha (apr_table_elts (rec_->headers_in)); - size_t n (ha->nelts); - - for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts)); - n--; ++h) - { - assert (h->key != nullptr); - - if (icasecmp (h->key, "Cookie") == 0) - { - for (const char* n (h->val); n != nullptr; ) - { - const char* v (strchr (n, '=')); - const char* e (strchr (n, ';')); - - if (e != nullptr && e < v) - v = nullptr; - - string name (v != nullptr - ? mime_url_decode (n, v, true) - : (e - ? mime_url_decode (n, e, true) - : mime_url_decode (n, n + strlen (n), true))); - - optional<string> value; - - if (v++) - value = e - ? mime_url_decode (v, e, true) - : mime_url_decode (v, v + strlen (v), true); - - if (!name.empty () || value) - cookies_->emplace_back (move (name), move (value)); - - n = e ? e + 1 : nullptr; - } - } - } - } - - return *cookies_; - } - - ostream& request:: - content (status_code status, const string& type, bool buffer) - { - if (out_ && - - // Same status code. - // - status == rec_->status && - - // Same buffering flag. - // - buffer == - (dynamic_cast<stringbuf*> (out_buf_.get ()) != nullptr) && - - // Same content type. - // - icasecmp (type, rec_->content_type ? rec_->content_type : "") == 0) - { - // No change, return the existing stream. - // - return *out_; - } - - if (state_ >= request_state::writing) - throw sequence_error ("web::apache::request::content"); - - if (!buffer) - // Request body will be discarded prior first byte of content is - // written. Save form data now to make it available for future - // parameters() call. - // - // In the rare cases when the form data is expectedly bigger than 64K - // the client can always call parameters(limit) explicitly. - // - form_data (64 * 1024); - - unique_ptr<streambuf> out_buf ( - buffer - ? static_cast<streambuf*> (new stringbuf ()) - : static_cast<streambuf*> (new ostreambuf (rec_, *this))); - - out_.reset (new ostream (out_buf.get ())); - out_buf_ = move (out_buf); - out_->exceptions (ostream::eofbit | ostream::failbit | ostream::badbit); - - rec_->status = status; - - ap_set_content_type ( - rec_, - type.empty () ? nullptr : apr_pstrdup (rec_->pool, type.c_str ())); - - return *out_; - } - - void request:: - status (status_code status) - { - if (status != rec_->status) - { - // Setting status code in exception handler is a common usecase - // where no sense to throw but still need to signal apache a - // proper status code. - // - if (state_ >= request_state::writing && !current_exception ()) - throw sequence_error ("web::apache::request::status"); - - rec_->status = status; - out_.reset (); - out_buf_.reset (); - ap_set_content_type (rec_, nullptr); - } - } - - void request:: - cookie (const char* name, - const char* value, - const chrono::seconds* max_age, - const char* path, - const char* domain, - bool secure, - bool buffer) - { - assert (!buffer); // Cookie buffering is not implemented yet. - - string s (mime_url_encode (name)); - s += "="; - s += mime_url_encode (value); - - if (max_age) - { - timestamp tp (system_clock::now () + *max_age); - time_t t (system_clock::to_time_t (tp)); - - // Assume global locale is not changed and still "C". - // - char b[100]; - strftime (b, sizeof (b), "%a, %d-%b-%Y %H:%M:%S GMT", gmtime (&t)); - s += "; Expires="; - s += b; - } - - if (path) - { - s += ";Path="; - s += path; - } - - if (domain) - { - s += ";Domain="; - s += domain; - } - - if (secure) - s += ";Secure"; - - state (request_state::headers); - apr_table_add (rec_->err_headers_out, "Set-Cookie", s.c_str ()); - } - } -} diff --git a/web/apache/request.hxx b/web/apache/request.hxx deleted file mode 100644 index 793a09d..0000000 --- a/web/apache/request.hxx +++ /dev/null @@ -1,233 +0,0 @@ -// file : web/apache/request.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_APACHE_REQUEST_HXX -#define WEB_APACHE_REQUEST_HXX - -#include <httpd.h> // request_rec, HTTP_*, OK, M_POST - -#include <chrono> -#include <memory> // unique_ptr -#include <string> -#include <vector> -#include <istream> -#include <ostream> -#include <streambuf> - -#include <web/module.hxx> -#include <web/apache/stream.hxx> - -namespace web -{ - namespace apache - { - // The state of the request processing, reflecting an interaction with - // Apache API (like reading/writing content function calls), with no - // buffering taken into account. Any state different from the initial - // suppose that some irrevocable interaction with Apache API have - // happened, so request processing should be either completed, or - // reported as failed. State values are ordered in a sense that the - // higher value reflects the more advanced stage of processing, so the - // request current state value may not decrease. - // - enum class request_state - { - // Denotes the initial stage of the request handling. At this stage - // the request line and headers are already parsed by Apache. - // - initial, - - // Reading the request content. - // - reading, - - // Adding the response headers (cookies in particular). - // - headers, - - // Writing the response content. - // - writing - }; - - // Extends istreambuf with read limit checking, caching, etc. (see the - // implementation for details). - // - class istreambuf_cache; - - // Stream type for reading from Apache's bucket brigades. - // - class istream_buckets; - - class request: public web::request, - public web::response, - public stream_state - { - friend class service; - - // Can not be inline/default due to the member of - // unique_ptr<istreambuf_cache> type. Note that istreambuf_cache type is - // incomplete. - // - request (request_rec* rec) noexcept; - ~request (); - - request_state - state () const noexcept {return state_;} - - // Flush the buffered response content if present. The returned value - // should be passed to Apache API on request handler exit. - // - int - flush (); - - // Prepare for the request re-processing if possible (no unbuffered - // read/write operations have been done). Throw sequence_error - // otherwise. In particular, the preparation can include the response - // content buffer cleanup, the request content buffer rewind. - // - void - rewind (); - - // Get request path. - // - virtual const path_type& - path (); - - // Get request body data stream. - // - virtual std::istream& - content (std::size_t limit = 0, std::size_t buffer = 0); - - // Get request parameters. - // - virtual const name_values& - parameters (std::size_t limit, bool url_only = false); - - // Get upload stream. - // - virtual std::istream& - open_upload (std::size_t index); - - virtual std::istream& - open_upload (const std::string& name); - - // Get request headers. - // - virtual const name_values& - headers (); - - // Get request cookies. - // - virtual const name_values& - cookies (); - - // Get response status code. - // - status_code - status () const noexcept {return rec_->status;} - - // Set response status code. - // - virtual void - status (status_code status); - - // Set response status code, content type and get body stream. - // - virtual std::ostream& - content (status_code status, - const std::string& type, - bool buffer = true); - - // Add response cookie. - // - virtual void - cookie (const char* name, - const char* value, - const std::chrono::seconds* max_age = nullptr, - const char* path = nullptr, - const char* domain = nullptr, - bool secure = false, - bool buffer = true); - - private: - // On the first call cache the application/x-www-form-urlencoded or - // multipart/form-data form data for the subsequent parameters parsing - // and set the multipart flag accordingly. Don't cache if the request is - // in the reading or later state. Return true if the cache contains the - // form data. - // - // Note that the function doesn't change the content buffering (see - // content() function for details) nor rewind the content stream after - // reading. - // - bool - form_data (std::size_t limit); - - // Used to also parse application/x-www-form-urlencoded POST body. - // - void - parse_url_parameters (const char* args); - - void - parse_multipart_parameters (const std::vector<char>& body); - - // Return a list of the upload input streams. Throw sequence_error if - // the parameters() function was not called yet. Throw invalid_argument - // if the request doesn't contain multipart form data. - // - using uploads_type = std::vector<std::unique_ptr<istream_buckets>>; - - uploads_type& - uploads () const; - - // Advance the request processing state. Noop if new state is equal to - // the current one. Throw sequence_error if the new state is less then - // the current one. Can throw invalid_request if HTTP request is - // malformed. - // - void - state (request_state); - - // stream_state members implementation. - // - virtual void - set_read_state () {state (request_state::reading);} - - virtual void - set_write_state () {state (request_state::writing);} - - private: - request_rec* rec_; - request_state state_ = request_state::initial; - - path_type path_; - - std::unique_ptr<name_values> parameters_; - bool url_only_parameters_; // Meaningless if parameters_ is NULL; - - // Uploaded file streams. If not NULL, is parallel to the parameters - // list. - // - std::unique_ptr<uploads_type> uploads_; - - std::unique_ptr<name_values> headers_; - std::unique_ptr<name_values> cookies_; - - // Form data cache. Is empty if the body doesn't contain the form data. - // - std::unique_ptr<std::vector<char>> form_data_; - bool form_multipart_; // Meaningless if form_data_ is NULL or empty; - - std::unique_ptr<istreambuf_cache> in_buf_; - std::unique_ptr<std::istream> in_; - - std::unique_ptr<std::streambuf> out_buf_; - std::unique_ptr<std::ostream> out_; - }; - } -} - -#include <web/apache/request.ixx> - -#endif // WEB_APACHE_REQUEST_HXX diff --git a/web/apache/request.ixx b/web/apache/request.ixx deleted file mode 100644 index 3a1c01a..0000000 --- a/web/apache/request.ixx +++ /dev/null @@ -1,45 +0,0 @@ -// file : web/apache/request.ixx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include <http_protocol.h> // ap_*() - -#include <sstream> // stringbuf - -namespace web -{ - namespace apache - { - inline int request:: - flush () - { - if (std::stringbuf* b = dynamic_cast<std::stringbuf*> (out_buf_.get ())) - { - // Response content is buffered. - // - std::string s (b->str ()); - - if (!s.empty ()) - { - try - { - state (request_state::writing); - - if (ap_rwrite (s.c_str (), s.length (), rec_) < 0) - rec_->status = HTTP_REQUEST_TIME_OUT; - } - catch (const invalid_request& e) - { - rec_->status = e.status; - } - } - - out_.reset (); - out_buf_.reset (); - } - - return rec_->status == HTTP_OK || state_ >= request_state::writing - ? OK - : rec_->status; - } - } -} diff --git a/web/apache/service.cxx b/web/apache/service.cxx deleted file mode 100644 index 1eeb65e..0000000 --- a/web/apache/service.cxx +++ /dev/null @@ -1,268 +0,0 @@ -// file : web/apache/service.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include <web/apache/service.hxx> - -#include <apr_pools.h> // apr_palloc() - -#include <httpd.h> // server_rec -#include <http_config.h> // command_rec, cmd_*, ap_get_module_config() - -#include <memory> // unique_ptr -#include <string> -#include <cassert> -#include <utility> // move() -#include <cstring> // strlen(), strcmp() -#include <exception> - -#include <libbutl/utility.mxx> // function_cast() -#include <libbutl/optional.mxx> - -#include <web/module.hxx> -#include <web/apache/log.hxx> - -using namespace std; -using namespace butl; - -namespace web -{ - namespace apache - { - void service:: - init_directives () - { - assert (cmds == nullptr); - - // Fill apache module directive definitions. Directives share common - // name space in apache configuration file, so to prevent name clash - // have to form directive name as a combination of module and option - // names: <module name>-<option name>. This why for option bar of module - // foo the corresponding directive will appear in apache configuration - // file as foo-bar. - // - const option_descriptions& od (exemplar_.options ()); - unique_ptr<command_rec[]> directives (new command_rec[od.size () + 2]); - command_rec* d (directives.get ()); - - for (const auto& o: od) - { - auto i ( - option_descriptions_.emplace (name_ + "-" + o.first, o.second)); - assert (i.second); - - *d++ = - { - i.first->first.c_str (), - function_cast<cmd_func> (parse_option), - this, - - // Allow directives in both server and directory configuration - // scopes. - // - RSRC_CONF | ACCESS_CONF, - - // Move away from TAKE1 to be able to handle empty string and - // no-value. - // - RAW_ARGS, - - nullptr - }; - } - - // Track if the handler is allowed to handle a request in the specific - // configuration scope. The handler exemplar will be created (and - // initialized) only for configuration contexts that have - // 'SetHandler <mod_name>' in effect for the corresponding scope. - // - *d++ = - { - "SetHandler", - function_cast<cmd_func> (parse_option), - this, - RSRC_CONF | ACCESS_CONF, - RAW_ARGS, - nullptr - }; - - *d = {nullptr, nullptr, nullptr, 0, RAW_ARGS, nullptr}; - cmds = directives.release (); - } - - void* service:: - create_server_context (apr_pool_t* pool, server_rec*) noexcept - { - // Create the object using the configuration memory pool provided by the - // Apache API. The lifetime of the object is equal to the lifetime of - // the pool. - // - void* p (apr_palloc (pool, sizeof (context))); - assert (p != nullptr); - return new (p) context (); - } - - void* service:: - create_dir_context (apr_pool_t* pool, char* dir) noexcept - { - // Create the object using the configuration memory pool provided by the - // Apache API. The lifetime of the object is equal to the lifetime of - // the pool. - // - void* p (apr_palloc (pool, sizeof (context))); - assert (p != nullptr); - - // For the user-defined directory configuration context dir is the path - // of the corresponding directive. For the special server directory - // invented by Apache for server scope directives, dir is NULL. - // - return new (p) context (dir == nullptr); - } - - const char* service:: - parse_option (cmd_parms* parms, void* conf, const char* args) noexcept - { - service& srv (*reinterpret_cast<service*> (parms->cmd->cmd_data)); - - if (srv.options_parsed_) - // Apache have started the second pass of its messy initialization - // cycle (more details at http://wiki.apache.org/httpd/ModuleLife). - // This time we are parsing for real. Cleanup the existing config, and - // start building the new one. - // - srv.clear_config (); - - // 'args' is an optionally double-quoted string. It uses double quotes - // to distinguish empty string from no-value case. - // - assert (args != nullptr); - - optional<string> value; - if (auto l = strlen (args)) - value = l >= 2 && args[0] == '"' && args[l - 1] == '"' - ? string (args + 1, l - 2) - : args; - - // Determine the directory and server configuration contexts for the - // option. - // - context* dir_context (context_cast (conf)); - assert (dir_context != nullptr); - - server_rec* server (parms->server); - assert (server != nullptr); - assert (server->module_config != nullptr); - - context* srv_context ( - context_cast (ap_get_module_config (server->module_config, &srv))); - - assert (srv_context != nullptr); - - // Associate the directory configuration context with the enclosing - // server configuration context. - // - context*& s (dir_context->server); - if (s == nullptr) - s = srv_context; - else - assert (s == srv_context); - - // If the option appears in the special directory configuration context, - // add it to the enclosing server context instead. This way it will be - // possible to complement all server-enclosed contexts (including this - // special one) with the server scope options. - // - context* c (dir_context->special ? srv_context : dir_context); - - if (dir_context->special) - // - // Make sure the special directory context is also in the option lists - // map. Later the context will be populated with an enclosing server - // context options. - // - srv.options_.emplace (dir_context, name_values ()); - - const char* name (parms->cmd->name); - if (strcmp (name, "SetHandler") == 0) - { - // Keep track of a request handling allowability. - // - srv.options_.emplace (c, name_values ()).first->first->handling = - value && *value == srv.name_ - ? request_handling::allowed - : request_handling::disallowed; - - return 0; - } - - return srv.add_option (c, name, move (value)); - } - - const char* service:: - add_option (context* ctx, const char* name, optional<string> value) - { - auto i (option_descriptions_.find (name)); - assert (i != option_descriptions_.end ()); - - // Check that option value presense is expected. - // - if (i->second != static_cast<bool> (value)) - return value ? "unexpected value" : "value expected"; - - options_[ctx].emplace_back (name + name_.length () + 1, move (value)); - return 0; - } - - void service:: - complement (context* enclosed, context* enclosing) - { - auto i (options_.find (enclosing)); - - // The enclosing context may have no options. It can be the context of a - // server that has no configuration directives in it's immediate scope, - // but has ones in it's enclosed scope (directory or virtual server). - // - if (i != options_.end ()) - { - const name_values& src (i->second); - name_values& dest (options_[enclosed]); - dest.insert (dest.begin (), src.begin (), src.end ()); - } - - if (enclosed->handling == request_handling::inherit) - enclosed->handling = enclosing->handling; - } - - void service:: - finalize_config (server_rec* s) - { - if (!version_logged_) - { - log l (s, this); - exemplar_.version (l); - version_logged_ = true; - } - - // Complement directory configuration contexts with options of the - // enclosing server configuration context. By this time virtual server - // contexts are already complemented with the main server configuration - // context options as a result of the merge_server_context() calls. - // - for (const auto& o: options_) - { - // Is a directory configuration context. - // - if (o.first->server != nullptr) - complement (o.first, o.first->server); - } - - options_parsed_ = true; - } - - void service:: - clear_config () - { - options_.clear (); - options_parsed_ = false; - } - } -} diff --git a/web/apache/service.hxx b/web/apache/service.hxx deleted file mode 100644 index aaf006e..0000000 --- a/web/apache/service.hxx +++ /dev/null @@ -1,333 +0,0 @@ -// file : web/apache/service.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_APACHE_SERVICE_HXX -#define WEB_APACHE_SERVICE_HXX - -#include <apr_pools.h> // apr_pool_t -#include <apr_hooks.h> // APR_HOOK_* - -#include <httpd.h> // request_rec, server_rec, HTTP_*, DECLINED -#include <http_config.h> // module, cmd_parms, ap_hook_*() - -#include <map> -#include <memory> // unique_ptr -#include <string> -#include <cassert> - -#include <web/module.hxx> -#include <web/apache/log.hxx> -#include <web/apache/request.hxx> - -namespace web -{ - namespace apache - { - // Apache has 3 configuration scopes: main server, virtual server, and - // directory (location). It provides configuration scope-aware modules - // with the ability to build a hierarchy of configuration contexts. Later, - // when processing a request, Apache passes the appropriate directory - // configuration context to the request handler. - // - // This Apache service implementation first makes a copy of the provided - // (in the constructor below) handler exemplar for each directory context. - // It then initializes each of these "context exemplars" with the (merged) - // set of configuration options. Finally, when handling a request, it - // copies the corresponding "context exemplar" to create the "handling - // instance". Note that the "context exemplars" are created as a copy of - // the provided exemplar, which is never initialized. As a result, it is - // possible to detect if the handler's copy constructor is used to create - // a "context exemplar" or a "handling instance". - // - class service: ::module - { - public: - // Note that the module exemplar is stored by-reference. - // - template <typename H> - service (const std::string& name, H& exemplar) - : ::module - { - STANDARD20_MODULE_STUFF, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - ®ister_hooks<H> - -#ifdef AP_MODULE_HAS_FLAGS - , AP_MODULE_FLAG_NONE -#endif - }, - name_ (name), - exemplar_ (exemplar) - { - init_directives (); - - // Set configuration context management hooks. - // - // The overall process of building the configuration hierarchy for a - // handler is as follows: - // - // 1. Apache creates directory and server configuration contexts for - // scopes containing handler-defined directives by calling the - // create_{server,dir}_context() callback functions. For directives - // at the server scope the special directory context is created as - // well. - // - // 2. Apache calls parse_option() function for each handler-defined - // directive. The function parses the directives and places the - // resulting options into the corresponding configuration context. - // It also establishes the directory-server contexts relations. - // - // 3. Apache calls merge_server_context() function for each virtual - // server. The function complements virtual server context options - // with the ones from the main server. - // - // 4. Apache calls config_finalizer() which complements the directory - // contexts options with the ones from the enclosing servers. - // - // 5. Apache calls worker_initializer() which creates handler exemplar - // for each directory configuration context that have - // 'SetHandler <mod_name>' directive in effect for it. - // - // References: - // http://www.apachetutor.org/dev/config - // http://httpd.apache.org/docs/2.4/developer/modguide.html - // http://wiki.apache.org/httpd/ModuleLife - // - create_server_config = &create_server_context; - create_dir_config = &create_dir_context; - merge_server_config = &merge_server_context<H>; - - // instance<H> () is invented to delegate processing from apache - // request handler C function to the service non static member - // function. This appoach resticts number of service objects per - // specific handler implementation class with just one instance. - // - service*& srv (instance<H> ()); - assert (srv == nullptr); - srv = this; - } - - ~service () - { - delete [] cmds; - } - - private: - template <typename H> - static service*& - instance () noexcept - { - static service* instance; - return instance; - } - - template <typename H> - static void - register_hooks (apr_pool_t*) noexcept - { - // The config_finalizer() function is called at the end of Apache - // server configuration parsing. - // - ap_hook_post_config (&config_finalizer<H>, NULL, NULL, APR_HOOK_LAST); - - // The worker_initializer() function is called right after Apache - // worker process is started. Called for every new process spawned. - // - ap_hook_child_init ( - &worker_initializer<H>, NULL, NULL, APR_HOOK_LAST); - - // The request_handler () function is called for each client request. - // - ap_hook_handler (&request_handler<H>, NULL, NULL, APR_HOOK_LAST); - } - - template <typename H> - static int - config_finalizer (apr_pool_t*, apr_pool_t*, apr_pool_t*, server_rec* s) - noexcept - { - instance<H> ()->finalize_config (s); - return OK; - } - - template <typename H> - static void - worker_initializer (apr_pool_t*, server_rec* s) noexcept - { - auto srv (instance<H> ()); - log l (s, srv); - srv->template init_worker<H> (l); - } - - template <typename H> - static int - request_handler (request_rec* r) noexcept; - - private: - - // Reflects the allowability of the request handling in the specific - // configuration scope. - // - enum class request_handling - { - // Configuration scope has 'SetHandler <mod_name>' directive - // specified. The handler is allowed to handle a request in the scope. - // - allowed, - - // Configuration scope has 'SetHandler <other_mod_name>|None' - // directive specified. The handler is disallowed to handle a request - // in the scope. - // - disallowed, - - // - // Note that if there are several SetHandler directives specified - // in the specific scope, then the latest one takes the precedence. - - // Configuration scope has no SetHandler directive specified. The - // request handling allowability is established by the enclosing - // scopes. - // - inherit - }; - - // Our representation of the Apache configuration context. - // - // The lifetime of this object is under the control of the Apache API, - // which treats it as a raw sequence of bytes. In order not to tinker - // with the C-style structures and APR memory pools, we will keep it a - // (C++11) POD type with just the members required to maintain the - // context hierarchy. - // - // We will then use the pointers to these context objects as keys in - // maps to (1) the corresponding application-level option lists during - // the configuration cycle and to (2) the corresponding handler exemplar - // during the HTTP request handling phase. We will also use the same - // type for both directory and server configuration contexts. - // - struct context - { - // Outer (server) configuration context for the directory - // configuration context, NULL otherwise. - // - context* server = nullptr; - - // If module directives appear directly in the server configuration - // scope, Apache creates a special directory context for them. This - // context appears at the same hierarchy level as the user-defined - // directory contexts of the same server scope. - // - bool special; - - // Request handling allowability for the corresponding configuration - // scope. - // - request_handling handling = request_handling::inherit; - - // Create the server configuration context. - // - context (): special (false) {} - - // Create the directory configuration context. Due to the Apache API - // implementation details it is not possible to detect the enclosing - // server configuration context at the time of directory context - // creation. As a result, the server member is set by the module's - // parse_option() function. - // - context (bool s): special (s) {} - - // Ensure the object is only destroyed by Apache. - // - ~context () = delete; - }; - - static context* - context_cast (void* config) noexcept - {return static_cast<context*> (config);} - - private: - void - init_directives (); - - // Create the server configuration context. Called by the Apache API - // whenever a new object of that type is required. - // - static void* - create_server_context (apr_pool_t*, server_rec*) noexcept; - - // Create the server directory configuration context. Called by the - // Apache API whenever a new object of that type is required. - // - static void* - create_dir_context (apr_pool_t*, char* dir) noexcept; - - template <typename H> - static void* - merge_server_context (apr_pool_t*, void* enclosing, void* enclosed) - noexcept - { - instance<H> ()->complement ( - context_cast (enclosed), context_cast (enclosing)); - - return enclosed; - } - - static const char* - parse_option (cmd_parms* parms, void* conf, const char* args) noexcept; - - const char* - add_option (context*, const char* name, optional<std::string> value); - - void - finalize_config (server_rec*); - - void - clear_config (); - - // Complement the enclosed context with options of the enclosing one. - // If the 'handling' member of the enclosed context is set to - // request_handling::inherit value, assign it a value from the enclosing - // context. - // - void - complement (context* enclosed, context* enclosing); - - template <typename H> - void - init_worker (log&); - - template <typename H> - int - handle (request&, const context*, log&) const; - - private: - std::string name_; - handler& exemplar_; - option_descriptions option_descriptions_; - - // The context objects pointed to by the key can change during the - // configuration phase. - // - using options = std::map<context*, name_values>; - options options_; - - // The context objects pointed to by the key can not change during the - // request handling phase. - // - using exemplars = std::map<const context*, std::unique_ptr<handler>>; - exemplars exemplars_; - - bool options_parsed_ = false; - bool version_logged_ = false; - }; - } -} - -#include <web/apache/service.txx> - -#endif // WEB_APACHE_SERVICE_HXX diff --git a/web/apache/service.txx b/web/apache/service.txx deleted file mode 100644 index bda8e10..0000000 --- a/web/apache/service.txx +++ /dev/null @@ -1,213 +0,0 @@ -// file : web/apache/service.txx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include <httpd.h> // APEXIT_CHILDSICK -#include <http_log.h> // APLOG_* - -#include <cstdlib> // exit() -#include <utility> // move() -#include <exception> - -#include <libbutl/utility.mxx> // operator<<(ostream, exception) - -namespace web -{ - namespace apache - { - template <typename H> - void service:: - init_worker (log& l) - { - using namespace std; - - const string func_name ( - "web::apache::service<" + name_ + ">::init_worker"); - - try - { - const H* exemplar (dynamic_cast<const H*> (&exemplar_)); - assert (exemplar != nullptr); - - // For each directory configuration context, for which the handler is - // allowed to handle a request, create the handler exemplar as a deep - // copy of the exemplar_ member, and initialize it with the - // context-specific option list. - // - for (const auto& o: options_) - { - const context* c (o.first); - - if (c->server != nullptr && // Is a directory configuration context. - c->handling == request_handling::allowed) - { - auto r ( - exemplars_.emplace ( - c, - unique_ptr<handler> (new H (*exemplar)))); - - r.first->second->init (o.second, l); - } - } - - // Options are not needed anymore. Free up the space. - // - options_.clear (); - } - catch (const exception& e) - { - l.write (nullptr, 0, func_name.c_str (), APLOG_EMERG, e.what ()); - - // Terminate the worker apache process. APEXIT_CHILDSICK indicates to - // the root process that the worker have exited due to a resource - // shortage. In this case the root process limits the rate of forking - // until situation is resolved. - // - // If the root process fails to create any worker process on startup, - // the behaviour depends on the Multi-Processing Module enabled. For - // mpm_worker_module and mpm_event_module the root process terminates. - // For mpm_prefork_module it keeps trying to create the worker process - // at one-second intervals. - // - // If the root process loses all it's workers while running (for - // example due to the MaxRequestsPerChild directive), and fails to - // create any new ones, it keeps trying to create the worker process - // at one-second intervals. - // - exit (APEXIT_CHILDSICK); - } - catch (...) - { - l.write (nullptr, - 0, - func_name.c_str (), - APLOG_EMERG, - "unknown error"); - - // Terminate the worker apache process. - // - exit (APEXIT_CHILDSICK); - } - } - - template <typename H> - int service:: - request_handler (request_rec* r) noexcept - { - auto srv (instance<H> ()); - if (!r->handler || srv->name_ != r->handler) return DECLINED; - - assert (r->per_dir_config != nullptr); - - // Obtain the request-associated configuration context. - // - const context* cx ( - context_cast (ap_get_module_config (r->per_dir_config, srv))); - - assert (cx != nullptr); - - request rq (r); - log lg (r->server, srv); - return srv->template handle<H> (rq, cx, lg); - } - - template <typename H> - int service:: - handle (request& rq, const context* cx, log& lg) const - { - using namespace std; - - static const string func_name ( - "web::apache::service<" + name_ + ">::handle"); - - try - { - auto i (exemplars_.find (cx)); - assert (i != exemplars_.end ()); - - const H* e (dynamic_cast<const H*> (i->second.get ())); - assert (e != nullptr); - - for (H h (*e);;) - { - try - { - if (static_cast<handler&> (h).handle (rq, rq, lg)) - return rq.flush (); - - if (rq.state () == request_state::initial) - return DECLINED; - - lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, - "handling declined being partially executed"); - break; - } - catch (const handler::retry&) - { - // Retry to handle the request. - // - rq.rewind (); - } - } - } - catch (const invalid_request& e) - { - if (!e.content.empty () && rq.state () < request_state::writing) - { - try - { - rq.content (e.status, e.type) << e.content << endl; - return rq.flush (); - } - catch (const exception& e) - { - lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); - } - } - - return e.status; - } - catch (const exception& e) - { - lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); - - if (*e.what () && rq.state () < request_state::writing) - { - try - { - rq.content ( - HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8") - << e << endl; - - return rq.flush (); - } - catch (const exception& e) - { - lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); - } - } - } - catch (...) - { - lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, "unknown error"); - - if (rq.state () < request_state::writing) - { - try - { - rq.content ( - HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8") - << "unknown error" << endl; - - return rq.flush (); - } - catch (const exception& e) - { - lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); - } - } - } - - return HTTP_INTERNAL_SERVER_ERROR; - } - } -} diff --git a/web/apache/stream.hxx b/web/apache/stream.hxx deleted file mode 100644 index ed0018e..0000000 --- a/web/apache/stream.hxx +++ /dev/null @@ -1,148 +0,0 @@ -// file : web/apache/stream.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_APACHE_STREAM_HXX -#define WEB_APACHE_STREAM_HXX - -#include <httpd.h> // request_rec, HTTP_* -#include <http_protocol.h> // ap_*() - -#include <ios> // streamsize -#include <vector> -#include <cstring> // memmove(), size_t -#include <streambuf> -#include <algorithm> // min(), max() - -#include <web/module.hxx> // invalid_request - -namespace web -{ - namespace apache - { - // Object of a class implementing this interface is intended for keeping - // the state of communication with the client. - // - struct stream_state - { - // Called by istreambuf functions when content is about to be read from - // the client. Can throw invalid_request or sequence_error. - // - virtual void - set_read_state () = 0; - - // Called by ostreambuf functions when some content is about to be - // written to the client. Can throw invalid_request or sequence_error. - // - virtual void - set_write_state () = 0; - }; - - // Base class for ostreambuf and istreambuf. References request and - // communication state structures. - // - class rbuf: public std::streambuf - { - protected: - rbuf (request_rec* r, stream_state& s): rec_ (r), state_ (s) {} - - protected: - request_rec* rec_; - stream_state& state_; - }; - - class ostreambuf: public rbuf - { - public: - ostreambuf (request_rec* r, stream_state& s): rbuf (r, s) {} - - private: - virtual int_type - overflow (int_type c) - { - if (c != traits_type::eof ()) - { - state_.set_write_state (); - - char chr (c); - - // Throwing allows to distinguish comm failure from other IO error - // conditions. - // - if (ap_rwrite (&chr, sizeof (chr), rec_) == -1) - throw invalid_request (HTTP_REQUEST_TIME_OUT); - } - - return c; - } - - virtual std::streamsize - xsputn (const char* s, std::streamsize num) - { - state_.set_write_state (); - - if (ap_rwrite (s, num, rec_) < 0) - throw invalid_request (HTTP_REQUEST_TIME_OUT); - - return num; - } - - virtual int - sync () - { - if (ap_rflush (rec_) < 0) - throw invalid_request (HTTP_REQUEST_TIME_OUT); - - return 0; - } - }; - - class istreambuf: public rbuf - { - public: - istreambuf (request_rec* r, - stream_state& s, - size_t bufsize = 1024, - size_t putback = 1) - : rbuf (r, s), - bufsize_ (std::max (bufsize, (size_t)1)), - putback_ (std::min (putback, bufsize_ - 1)), - buf_ (bufsize_) - { - char* p (buf_.data () + putback_); - setg (p, p, p); - } - - protected: - virtual int_type - underflow () - { - if (gptr () < egptr ()) - return traits_type::to_int_type (*gptr ()); - - state_.set_read_state (); - - size_t pb (std::min ((size_t)(gptr () - eback ()), putback_)); - std::memmove (buf_.data () + putback_ - pb, gptr () - pb, pb); - - char* p (buf_.data () + putback_); - int rb (ap_get_client_block (rec_, p, bufsize_ - putback_)); - - if (rb == 0) - return traits_type::eof (); - - if (rb < 0) - throw invalid_request (HTTP_REQUEST_TIME_OUT); - - setg (p - pb, p, p + rb); - return traits_type::to_int_type (*gptr ()); - } - - protected: - size_t bufsize_; - size_t putback_; - std::vector<char> buf_; - }; - } -} - -#endif // WEB_APACHE_STREAM_HXX diff --git a/web/buildfile b/web/buildfile deleted file mode 100644 index 0ca6cf6..0000000 --- a/web/buildfile +++ /dev/null @@ -1,18 +0,0 @@ -# file : web/buildfile -# license : MIT; see accompanying LICENSE file - -# This is currently part of the brep apache module but lives in a separate -# directory. Thus the strange choices: libus{}, no header installation, etc. - -# While we don't need to link to APR, we need to find its header location. -# -import libs = libapr1%lib{apr-1} -import libs += libapreq2%lib{apreq2} -import libs += libstudxml%lib{studxml} -import libs += libbutl%lib{butl} - -libus{web}: {hxx ixx txx cxx}{** -version} {hxx}{version} $libs - -hxx{version}: in{version} $src_root/manifest - -{hxx ixx txx}{*}: install = false diff --git a/web/mime-url-encoding.cxx b/web/mime-url-encoding.cxx deleted file mode 100644 index e202f08..0000000 --- a/web/mime-url-encoding.cxx +++ /dev/null @@ -1,66 +0,0 @@ -// file : web/mime-url-encoding.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include <web/mime-url-encoding.hxx> - -#include <string> -#include <iterator> // back_inserter - -#include <libbutl/url.mxx> - -using namespace std; -using namespace butl; - -namespace web -{ - inline static bool - encode_query (char& c) - { - if (c == ' ') - { - c = '+'; - return false; - } - - return !url::unreserved (c); - } - - string - mime_url_encode (const char* v, bool query) - { - return query ? url::encode (v, encode_query) : url::encode (v); - } - - string - mime_url_encode (const string& v, bool query) - { - return query ? url::encode (v, encode_query) : url::encode (v); - } - - string - mime_url_decode (const char* b, const char* e, bool trim, bool query) - { - if (trim) - { - for (; b != e && *b == ' '; ++b) ; - - if (b == e) - return string (); - - while (*--e == ' '); - ++e; - } - - string r; - if (!query) - url::decode (b, e, back_inserter (r)); - else - url::decode (b, e, back_inserter (r), - [] (char& c) - { - if (c == '+') - c = ' '; - }); - return r; - } -} diff --git a/web/mime-url-encoding.hxx b/web/mime-url-encoding.hxx deleted file mode 100644 index b9d2a76..0000000 --- a/web/mime-url-encoding.hxx +++ /dev/null @@ -1,32 +0,0 @@ -// file : web/mime-url-encoding.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_MIME_URL_ENCODING_HXX -#define WEB_MIME_URL_ENCODING_HXX - -#include <string> - -namespace web -{ - // URL-encode characters other than unreserved (see RFC3986). If the query - // flag is true, then the encoding is applied to the URL query part, and so - // convert space characters to plus characters rather than percent-encode - // them. - // - std::string - mime_url_encode (const char*, bool query = true); - - std::string - mime_url_encode (const std::string&, bool query = true); - - // If the query flag is true, then convert plus characters to space - // characters (see above). Throw std::invalid_argument if an invalid encoding - // sequence is encountered. - // - std::string - mime_url_decode (const char* b, const char* e, - bool trim = false, - bool query = true); -} - -#endif // WEB_MIME_URL_ENCODING_HXX diff --git a/web/module.hxx b/web/module.hxx deleted file mode 100644 index 5e9959e..0000000 --- a/web/module.hxx +++ /dev/null @@ -1,299 +0,0 @@ -// file : web/module.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_MODULE_HXX -#define WEB_MODULE_HXX - -#include <map> -#include <string> -#include <vector> -#include <iosfwd> -#include <chrono> -#include <cstdint> // uint16_t -#include <cstddef> // size_t -#include <utility> // move() -#include <stdexcept> // runtime_error - -#include <libbutl/path.mxx> -#include <libbutl/optional.mxx> - -namespace web -{ - using butl::optional; - - // HTTP status code. - // - // @@ Define some commonly used constants? - // - using status_code = std::uint16_t; - - // This exception is used to signal that the request is invalid - // (4XX codes) rather than that it could not be processed (5XX). - // By default 400 is returned, which means the request is malformed. - // - // If caught by the web server implementation, it will try to return - // the specified status and content to the client, if possible. - // It is, however, may not be possible if some unbuffered content has - // already been written. The behavior in this case is implementation- - // specific and may result in no indication of an error being sent to - // the client. - // - struct invalid_request - { - status_code status; - std::string content; - std::string type; - - //@@ Maybe optional "try again" link? - // - invalid_request (status_code s = 400, - std::string c = "", - std::string t = "text/plain;charset=utf-8") - : status (s), content (std::move (c)), type (std::move (t)) {} - }; - - // Exception indicating HTTP request/response sequencing error. - // For example, trying to change the status code after some - // content has already been written. - // - struct sequence_error: std::runtime_error - { - sequence_error (std::string d): std::runtime_error (std::move (d)) {} - }; - - // Map of handler configuration option names to the boolean flag indicating - // whether the value is expected for the option. - // - using option_descriptions = std::map<std::string, bool>; - - struct name_value - { - // These should eventually become string_view's. - // - std::string name; - optional<std::string> value; - - name_value () {} - name_value (std::string n, optional<std::string> v) - : name (std::move (n)), value (std::move (v)) {} - }; - - using name_values = std::vector<name_value>; - using butl::path; - - class request - { - public: - using path_type = web::path; - - virtual - ~request () = default; - - // Corresponds to abs_path portion of HTTP URL as described in "3.2.2 HTTP - // URL" of http://tools.ietf.org/html/rfc2616. Returns '/' if no abs_path - // is present in URL. - // - virtual const path_type& - path () = 0; - - //@@ Why not pass parameters directly? Lazy parsing? - //@@ Why not have something like operator[] for lookup? Probably - // in name_values. - //@@ Maybe parameter_list() and parameter_map()? - // - // Parse parameters from the URL query part and from the HTTP POST request - // body for the application/x-www-form-urlencoded or multipart/form-data - // content type. Optionally limit the amount of data read from the body - // (see the content() function for the semantics). Throw invalid_request - // if parameters decoding fails. - // - virtual const name_values& - parameters (std::size_t limit, bool url_only = false) = 0; - - // Open the input stream for the upload corresponding to the specified - // parameter index. Must be called after the parameters() function is - // called, throw sequence_error if that's not the case. Throw - // invalid_argument if the index doesn't have an upload (for example, - // because the parameter is not <input type="file"/> form field). - // - // Note also that reopening the same upload (within the same retry) - // returns the same stream reference. - // - virtual std::istream& - open_upload (std::size_t index) = 0; - - // As above but specify the parameter by name. Throw invalid_argument if - // there are multiple uploads for this parameter name. - // - virtual std::istream& - open_upload (const std::string& name) = 0; - - // Request headers. - // - // The implementation may add custom pseudo-headers reflecting additional - // request options. Such headers should start with ':'. If possible, the - // implementation should add the following well-known pseudo-headers: - // - // :Client-IP - IP address of the connecting client. - // - virtual const name_values& - headers () = 0; - - // Throw invalid_request if cookies are malformed. - // - virtual const name_values& - cookies () = 0; - - // Get the stream to read the request content from. If the limit argument - // is zero, then the content limit is left unchanged (unlimited initially). - // Otherwise the requested limit is set, and the invalid_request exception - // with the code 413 (payload too large) will be thrown when the specified - // limit is reached while reading from the stream. If the buffer argument - // is zero, then the buffer size is left unchanged (zero initially). If it - // is impossible to increase the buffer size (because, for example, some - // content is already read unbuffered), then the sequence_error is thrown. - // - // Note that unread input content is discarded when any unbuffered content - // is written, and any attempt to read it will result in the - // sequence_error exception being thrown. - // - virtual std::istream& - content (std::size_t limit, std::size_t buffer = 0) = 0; - }; - - class response - { - public: - virtual - ~response () = default; - - // Set status code, content type, and get the stream to write - // the content to. If the buffer argument is true (default), - // then buffer the entire content before sending it as a - // response. This allows us to change the status code in - // case of an error. - // - // Specifically, if there is already content in the buffer - // and the status code is changed, then the old content is - // discarded. If the content was not buffered and the status - // is changed, then the sequence_error exception is thrown. - // If this exception leaves handler::handle(), then the - // implementation shall terminate the response in a suitable - // but unspecified manner. In particular, there is no guarantee - // that the user will be notified of an error or observe the - // new status. - // - virtual std::ostream& - content (status_code code = 200, - const std::string& type = "application/xhtml+xml;charset=utf-8", - bool buffer = true) = 0; - - // Set status code without writing any content. On status change, - // discard buffered content or throw sequence_error if content was - // not buffered. - // - virtual void - status (status_code) = 0; - - // Throw sequence_error if some unbuffered content has already - // been written. - // - virtual void - cookie (const char* name, - const char* value, - const std::chrono::seconds* max_age = nullptr, - const char* path = nullptr, - const char* domain = nullptr, - bool secure = false, - bool buffer = true) = 0; - }; - - // A web server logging backend. The handler can use it to log - // diagnostics that is meant for the web server operator rather - // than the user. - // - // The handler can cast this basic interface to the web server's - // specific implementation that may provide a richer interface. - // - class log - { - public: - virtual - ~log () = default; - - virtual void - write (const char* msg) = 0; - }; - - // The web server creates a new handler instance for each request - // by copy-initializing it with the handler exemplar. This way we - // achieve two things: we can freely use handler data members - // without worrying about multi-threading issues and we - // automatically get started with the initial state for each - // request. If you really need to share some rw-data between - // all the handlers, use static data members with appropriate - // locking. See the <service> header in one of the web server - // directories (e.g., apache/) if you need to see the code that - // does this. - // - class handler - { - public: - virtual - ~handler () = default; - - // Description of configuration options supported by this handler. Note: - // should be callable during static initialization. - // - virtual option_descriptions - options () = 0; - - // During startup the web server calls this function on the handler - // exemplar to log the handler version information. It is up to the web - // server whether to call this function once per handler implementation - // type. Therefore, it is expected that this function will log the same - // information for all the handler exemplars. - // - virtual void - version (log&) = 0; - - // During startup the web server calls this function on the handler - // exemplar passing a list of configuration options. The place these - // configuration options come from is implementation-specific (normally - // a configuration file). The web server guarantees that only options - // listed in the map returned by the options() function above can be - // present. Any exception thrown by this function terminates the web - // server. - // - virtual void - init (const name_values&, log&) = 0; - - // Return false if decline to handle the request. If handling have been - // declined after any unbuffered content has been written, then the - // implementation shall terminate the response in a suitable but - // unspecified manner. - // - // Throw retry if need to retry handling the request. The retry will - // happen on the same instance of the handler and the implementation is - // expected to "rewind" the request and response objects to their initial - // state. This is only guaranteed to be possible if the relevant functions - // in the request and response objects were called in buffered mode (the - // buffer argument was true). - // - // Any exception other than retry and invalid_request described above that - // leaves this function is treated by the web server implementation as an - // internal server error (500). Similar to invalid_request, it will try to - // return the status and description (obtained by calling what() on - // std::exception) to the client, if possible. The description is assume - // to be encoded in UTF-8. The implementation may provide a configuration - // option to omit the description from the response, for security/privacy - // reasons. - // - struct retry {}; - - virtual bool - handle (request&, response&, log&) = 0; - }; -} - -#endif // WEB_MODULE_HXX diff --git a/web/server/apache/log.hxx b/web/server/apache/log.hxx new file mode 100644 index 0000000..f7738ef --- /dev/null +++ b/web/server/apache/log.hxx @@ -0,0 +1,80 @@ +// file : web/server/apache/log.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_APACHE_LOG_HXX +#define WEB_SERVER_APACHE_LOG_HXX + +#include <httpd.h> // request_rec, server_rec +#include <http_log.h> +#include <http_config.h> // module + +#include <cstdint> // uint64_t +#include <algorithm> // min() + +#include <web/server/module.hxx> + +namespace web +{ + namespace apache + { + class log: public web::log + { + public: + + log (server_rec* s, const ::module* m) noexcept + : server_ (s), module_ (m) {} + + virtual void + write (const char* msg) {write (APLOG_ERR, msg);} + + // Apache-specific interface. + // + void + write (int level, const char* msg) const noexcept + { + write (nullptr, 0, nullptr, level, msg); + } + + void + write (const char* file, + std::uint64_t line, + const char* func, + int level, + const char* msg) const noexcept + { + if (file && *file) + file = nullptr; // Skip file/line placeholder from log line. + + level = std::min (level, APLOG_TRACE8); + + if (func) + ap_log_error (file, + line, + module_->module_index, + level, + 0, + server_, + "[%s]: %s", + func, + msg); + else + // Skip function name placeholder from log line. + // + ap_log_error (file, + line, + module_->module_index, + level, + 0, + server_, + ": %s", + msg); + } + + private: + server_rec* server_; + const ::module* module_; // Apache module. + }; + } +} + +#endif // WEB_SERVER_APACHE_LOG_HXX diff --git a/web/server/apache/request.cxx b/web/server/apache/request.cxx new file mode 100644 index 0000000..a413081 --- /dev/null +++ b/web/server/apache/request.cxx @@ -0,0 +1,1005 @@ +// file : web/server/apache/request.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <web/server/apache/request.hxx> + +#include <apr.h> // APR_SIZE_MAX +#include <apr_errno.h> // apr_status_t, APR_SUCCESS, APR_E*, apr_strerror() +#include <apr_tables.h> // apr_table_*, apr_table_*(), apr_array_header_t +#include <apr_strings.h> // apr_pstrdup() +#include <apr_buckets.h> // apr_bucket*, apr_bucket_*(), apr_brigade_*(), + // APR_BRIGADE_*() + +#include <httpd.h> // request_rec, HTTP_*, OK +#include <http_protocol.h> // ap_*() + +#include <apreq2/apreq.h> // APREQ_* +#include <apreq2/apreq_util.h> // apreq_brigade_copy() +#include <apreq2/apreq_param.h> // apreq_param_t, apreq_value_to_param() +#include <apreq2/apreq_parser.h> // apreq_parser_t, apreq_parser_make() + +#include <ctime> // strftime(), time_t +#include <vector> +#include <chrono> +#include <memory> // unique_ptr +#include <string> +#include <cassert> +#include <ostream> +#include <istream> +#include <cstring> // str*(), memcpy(), size_t +#include <utility> // move() +#include <iterator> // istreambuf_iterator +#include <stdexcept> // invalid_argument, runtime_error +#include <exception> // current_exception() +#include <streambuf> +#include <algorithm> // min() + +#include <libbutl/utility.mxx> // icasecmp() +#include <libbutl/optional.mxx> +#include <libbutl/timestamp.mxx> + +#include <web/server/mime-url-encoding.hxx> + +using namespace std; +using namespace butl; + +namespace web +{ + namespace apache + { + [[noreturn]] static void + throw_internal_error (apr_status_t s, const string& what) + { + char buf[1024]; + throw runtime_error (what + ": " + apr_strerror (s, buf, sizeof (buf))); + } + + // Extend the Apache stream with checking for the read limit and caching + // the content if requested. Replay the cached content after rewind. + // + class istreambuf_cache: public istreambuf + { + enum class mode + { + cache, // Read from Apache stream, save the read data into the cache. + replay, // Read from the cache. + proxy // Read from Apache stream (don't save into the cache). + }; + + public: + istreambuf_cache (size_t read_limit, size_t cache_limit, + request_rec* r, + stream_state& s, + size_t bufsize = 1024, + size_t putback = 1) + : istreambuf (r, s, bufsize, putback), + read_limit_ (read_limit), + cache_limit_ (cache_limit) + { + } + + void + rewind () + { + // Fail if some content is already missed in the cache. + // + if (mode_ == mode::proxy) + throw sequence_error ( + string ("web::apache::istreambuf_cache::rewind: ") + + (cache_limit_ > 0 + ? "half-buffered" + : "unbuffered")); + + mode_ = mode::replay; + replay_pos_ = 0; + setg (nullptr, nullptr, nullptr); + } + + void + limits (size_t read_limit, size_t cache_limit) + { + if (read_limit > 0) + read_limit_ = read_limit; + + if (cache_limit > 0) + { + // We can not increase the cache limit if some content is already + // missed in the cache. + // + if (cache_limit > cache_limit_ && mode_ == mode::proxy) + throw sequence_error ( + "web::apache::istreambuf_cache::limits: unbuffered"); + + cache_limit_ = cache_limit; + } + } + + size_t read_limit () const noexcept {return read_limit_;} + size_t cache_limit () const noexcept {return cache_limit_;} + + private: + virtual int_type + underflow (); + + private: + // Limits + // + size_t read_limit_; + size_t cache_limit_; + + // State + // + mode mode_ = mode::cache; + size_t read_bytes_ = 0; + bool eof_ = false; // End of Apache stream is reached. + + // Cache + // + struct chunk + { + vector<char> data; + size_t offset; + + chunk (vector<char>&& d, size_t o): data (move (d)), offset (o) {} + + // Make the type move constructible-only to avoid copying of chunks on + // vector growth. + // + chunk (chunk&&) = default; + }; + + vector<chunk> cache_; + size_t cache_size_ = 0; + size_t replay_pos_ = 0; + }; + + istreambuf_cache::int_type istreambuf_cache:: + underflow () + { + if (gptr () < egptr ()) + return traits_type::to_int_type (*gptr ()); + + if (mode_ == mode::replay) + { + if (replay_pos_ < cache_.size ()) + { + chunk& ch (cache_[replay_pos_++]); + char* p (ch.data.data ()); + setg (p, p + ch.offset, p + ch.data.size ()); + return traits_type::to_int_type (*gptr ()); + } + + // No more data to replay, so switch to the cache mode. That includes + // resetting eback, gptr and egptr, so they point into the istreambuf's + // internal buffer. Putback area should also be restored. + // + mode_ = mode::cache; + + // Bail out if the end of stream is reached. + // + if (eof_) + return traits_type::eof (); + + char* p (buf_.data () + putback_); + size_t pb (0); + + // Restore putback area if there is any cached data. Thanks to + // istreambuf, it's all in a single chunk. + // + if (!cache_.empty ()) + { + chunk& ch (cache_.back ()); + pb = min (putback_, ch.data.size ()); + memcpy (p - pb, ch.data.data () + ch.data.size () - pb, pb); + } + + setg (p - pb, p, p); + } + + // Delegate reading to the base class in the cache or proxy modes, but + // check for the read limit first. + // + if (read_limit_ && read_bytes_ >= read_limit_) + throw invalid_request (HTTP_REQUEST_ENTITY_TOO_LARGE, + "payload too large"); + + // Throws the sequence_error exception if some unbuffered content is + // already written. + // + int_type r (istreambuf::underflow ()); + + if (r == traits_type::eof ()) + { + eof_ = true; + return r; + } + + // Increment the read bytes counter. + // + size_t rb (egptr () - gptr ()); + read_bytes_ += rb; + + // In the cache mode save the read data if the cache limit is not + // reached, otherwise switch to the proxy mode. + // + if (mode_ == mode::cache) + { + // Not to complicate things we will copy the buffer into the cache + // together with the putback area, which is OK as it usually takes a + // small fraction of the buffer. By the same reason we will cache the + // whole data read even though we can exceed the limits by + // bufsize - putback - 1 bytes. + // + if (cache_size_ < cache_limit_) + { + chunk ch (vector<char> (eback (), egptr ()), + static_cast<size_t> (gptr () - eback ())); + + cache_.emplace_back (move (ch)); + cache_size_ += rb; + } + else + mode_ = mode::proxy; + } + + return r; + } + + // Stream interface for reading from the Apache's bucket brigade. Put back + // is not supported. + // + // Note that reading from a brigade bucket modifies the brigade in the + // general case. For example, reading from a file bucket adds a new heap + // bucket before the file bucket on every read. Traversing/reading through + // such a bucket brigade effectively loads the whole file into the memory, + // so the subsequent brigade traversal results in iterating over the + // loaded heap buckets. + // + // To avoid such a behavior we will make a shallow copy of the original + // bucket brigade, initially and for each rewind. Then, instead of + // iterating, we will always read from the first bucket removing it after + // the use. + // + class istreambuf_buckets: public streambuf + { + public: + // The bucket brigade must exist during the object's lifetime. + // + explicit + istreambuf_buckets (const apr_bucket_brigade* bs) + : orig_buckets_ (bs), + buckets_ (apr_brigade_create (bs->p, bs->bucket_alloc)) + + { + if (buckets_ == nullptr) + throw_internal_error (APR_ENOMEM, "apr_brigade_create"); + + rewind (); // Copy the original buckets. + } + + void + rewind () + { + // Note that apreq_brigade_copy() appends buckets to the destination, + // so we clean it up first. + // + apr_status_t r (apr_brigade_cleanup (buckets_.get ())); + if (r != APR_SUCCESS) + throw_internal_error (r, "apr_brigade_cleanup"); + + r = apreq_brigade_copy ( + buckets_.get (), + const_cast<apr_bucket_brigade*> (orig_buckets_)); + + if (r != APR_SUCCESS) + throw_internal_error (r, "apreq_brigade_copy"); + + setg (nullptr, nullptr, nullptr); + } + + private: + virtual int_type + underflow () + { + if (gptr () < egptr ()) + return traits_type::to_int_type (*gptr ()); + + // If the get-pointer is not NULL then it points to the data referred + // by the first brigade bucket. As we will bail out or rewrite such a + // pointer now there is no need for the bucket either, so we can + // safely delete it. + // + if (gptr () != nullptr) + { + assert (!APR_BRIGADE_EMPTY (buckets_)); + + // Note that apr_bucket_delete() is a macro and the following + // call ends up badly (with SIGSEGV). + // + // apr_bucket_delete (APR_BRIGADE_FIRST (buckets_)); + // + apr_bucket* b (APR_BRIGADE_FIRST (buckets_)); + apr_bucket_delete (b); + } + + if (APR_BRIGADE_EMPTY (buckets_)) + return traits_type::eof (); + + apr_size_t n; + const char* d; + apr_bucket* b (APR_BRIGADE_FIRST (buckets_)); + apr_status_t r (apr_bucket_read (b, &d, &n, APR_BLOCK_READ)); + + if (r != APR_SUCCESS) + throw_internal_error (r, "apr_bucket_read"); + + char* p (const_cast<char*> (d)); + setg (p, p, p + n); + return traits_type::to_int_type (*gptr ()); + } + + private: + const apr_bucket_brigade* orig_buckets_; + + struct brigade_deleter + { + void operator() (apr_bucket_brigade* p) const + { + if (p != nullptr) + { + apr_status_t r (apr_brigade_destroy (p)); + + // Shouldn't fail unless something is severely damaged. + // + assert (r == APR_SUCCESS); + } + } + }; + + unique_ptr<apr_bucket_brigade, brigade_deleter> buckets_; + }; + + class istream_buckets_base + { + public: + explicit + istream_buckets_base (const apr_bucket_brigade* bs): buf_ (bs) {} + + protected: + istreambuf_buckets buf_; + }; + + class istream_buckets: public istream_buckets_base, public istream + { + public: + explicit + istream_buckets (const apr_bucket_brigade* bs) + // Note that calling dtor for istream object before init() is called + // is undefined behavior. That's the reason for inventing the + // istream_buckets_base class. + // + : istream_buckets_base (bs), istream (&buf_) + { + exceptions (failbit | badbit); + } + + void + rewind () + { + buf_.rewind (); + clear (); // Clears *bit flags (in particular eofbit). + } + }; + + // request + // + request:: + request (request_rec* rec) noexcept + : rec_ (rec) + { + rec_->status = HTTP_OK; + } + + request:: + ~request () + { + } + + void request:: + state (request_state s) + { + assert (s != request_state::initial); + + if (s == state_) + return; // Noop. + + if (s < state_) + { + // Can't "unwind" irrevocable interaction with Apache API. + // + static const char* names[] = { + "initial", "reading", "headers", "writing"}; + + string str ("web::apache::request::set_state: "); + str += names[static_cast<size_t> (state_)]; + str += " to "; + str += names[static_cast<size_t> (s)]; + + throw sequence_error (move (str)); + } + + if (s == request_state::reading) + { + // Prepare request content for reading. + // + int r (ap_setup_client_block (rec_, REQUEST_CHUNKED_DECHUNK)); + + if (r != OK) + throw invalid_request (r); + } + else if (s > request_state::reading && state_ <= request_state::reading) + { + // Read request content if any, discard whatever is received. + // + int r (ap_discard_request_body (rec_)); + + if (r != OK) + throw invalid_request (r); + } + + state_ = s; + } + + void request:: + rewind () + { + // @@ Response cookies buffering is not supported yet. When done will be + // possible to rewind in broader range of cases. + // + if (state_ > request_state::reading) + throw sequence_error ("web::apache::request::rewind: unbuffered"); + + out_.reset (); + out_buf_.reset (); + + rec_->status = HTTP_OK; + + ap_set_content_type (rec_, nullptr); // Unset the output content type. + + // We don't need to rewind the input stream (which well may fail if + // unbuffered) if the form data is already read. + // + if (in_ != nullptr && form_data_ == nullptr) + { + assert (in_buf_ != nullptr); + + in_buf_->rewind (); // Throws if impossible to rewind. + in_->clear (); // Clears *bit flags (in particular eofbit). + } + + // Rewind uploaded file streams. + // + if (uploads_ != nullptr) + { + for (const unique_ptr<istream_buckets>& is: *uploads_) + { + if (is != nullptr) + is->rewind (); + } + } + } + + istream& request:: + content (size_t limit, size_t buffer) + { + // Create the input stream/streambuf if not present, otherwise adjust the + // limits. + // + if (in_ == nullptr) + { + unique_ptr<istreambuf_cache> in_buf ( + new istreambuf_cache (limit, buffer, rec_, *this)); + + in_.reset (new istream (in_buf.get ())); + in_buf_ = move (in_buf); + in_->exceptions (istream::failbit | istream::badbit); + } + else + { + assert (in_buf_ != nullptr); + in_buf_->limits (limit, buffer); + } + + return *in_; + } + + const path& request:: + path () + { + if (path_.empty ()) + { + path_ = path_type (rec_->uri); // Is already URL-decoded. + + // Module request handler can not be called if URI is empty. + // + assert (!path_.empty ()); + } + + return path_; + } + + const name_values& request:: + parameters (size_t limit, bool url_only) + { + if (parameters_ == nullptr || url_only < url_only_parameters_) + { + try + { + if (parameters_ == nullptr) + { + parameters_.reset (new name_values ()); + parse_url_parameters (rec_->args); + } + + if (!url_only && form_data (limit)) + { + // After the form data is parsed we can clean it up for the + // application/x-www-form-urlencoded encoding but not for the + // multipart/form-data (see parse_multipart_parameters() for + // details). + // + if (form_multipart_) + parse_multipart_parameters (*form_data_); + else + { + // Make the character vector a NULL-terminated string. + // + form_data_->push_back ('\0'); + + parse_url_parameters (form_data_->data ()); + *form_data_ = vector<char> (); // Reset the cache. + } + } + } + catch (const invalid_argument&) + { + throw invalid_request (); + } + + url_only_parameters_ = url_only; + } + + return *parameters_; + } + + bool request:: + form_data (size_t limit) + { + if (form_data_ == nullptr) + { + form_data_.reset (new vector<char> ()); + + // We will not consider POST body as a form data if the request is in + // the reading or later state. + // + if (rec_->method_number == M_POST && state_ < request_state::reading) + { + const char* ct (apr_table_get (rec_->headers_in, "Content-Type")); + + if (ct != nullptr) + { + form_multipart_ = icasecmp ("multipart/form-data", ct, 19) == 0; + + if (form_multipart_ || + icasecmp ("application/x-www-form-urlencoded", ct, 33) == 0) + *form_data_ = vector<char> ( + istreambuf_iterator<char> (content (limit)), + istreambuf_iterator<char> ()); + } + } + } + + return !form_data_->empty (); + } + + void request:: + parse_url_parameters (const char* args) + { + assert (parameters_ != nullptr); + + for (auto n (args); n != nullptr; ) + { + const char* v (strchr (n, '=')); + const char* e (strchr (n, '&')); + + if (e != nullptr && e < v) + v = nullptr; + + string name (v != nullptr + ? mime_url_decode (n, v) : + (e + ? mime_url_decode (n, e) + : mime_url_decode (n, n + strlen (n)))); + + optional<string> value; + + if (v++) + value = e + ? mime_url_decode (v, e) + : mime_url_decode (v, v + strlen (v)); + + if (!name.empty () || value) + parameters_->emplace_back (move (name), move (value)); + + n = e ? e + 1 : nullptr; + } + } + + void request:: + parse_multipart_parameters (const vector<char>& body) + { + assert (parameters_ != nullptr && uploads_ == nullptr); + + auto throw_bad_request = [] (apr_status_t s, + status_code sc = HTTP_BAD_REQUEST) + { + char buf[1024]; + throw invalid_request (sc, apr_strerror (s, buf, sizeof (buf))); + }; + + // Create the file upload stream list, filling it with NULLs for the + // parameters parsed from the URL query part. + // + uploads_.reset ( + new vector<unique_ptr<istream_buckets>> (parameters_->size ())); + + // All the required objects (parser, input/output buckets, etc.) will be + // allocated in the request memory pool and so will have the HTTP + // request duration lifetime. + // + apr_pool_t* pool (rec_->pool); + + // Create the input bucket brigade containing a single bucket that + // references the form data. + // + apr_bucket_alloc_t* ba (apr_bucket_alloc_create (pool)); + if (ba == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_alloc_create"); + + apr_bucket_brigade* bb (apr_brigade_create (pool, ba)); + if (bb == nullptr) + throw_internal_error (APR_ENOMEM, "apr_brigade_create"); + + apr_bucket* b ( + apr_bucket_immortal_create (body.data (), body.size (), ba)); + + if (b == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_immortal_create"); + + APR_BRIGADE_INSERT_TAIL (bb, b); + + if ((b = apr_bucket_eos_create (ba)) == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_eos_create"); + + APR_BRIGADE_INSERT_TAIL (bb, b); + + // Make sure that the parser will not swap the parsed data to disk + // passing the maximum possible value for the brigade limit. This way + // the resulting buckets will reference the form data directly, making + // no copies. This why we should not reset the form data cache after + // the parsing. + // + // Note that in future we may possibly setup the parser to read from the + // Apache internals directly and enable swapping the data to disk to + // minimize memory consumption. + // + apreq_parser_t* parser ( + apreq_parser_make (pool, + ba, + apr_table_get (rec_->headers_in, "Content-Type"), + apreq_parse_multipart, + APR_SIZE_MAX /* brigade_limit */, + nullptr /* temp_dir */, + nullptr /* hook */, + nullptr /* ctx */)); + + if (parser == nullptr) + throw_internal_error (APR_ENOMEM, "apreq_parser_make"); + + // Create the output table that will be filled with the parsed + // parameters. + // + apr_table_t* params (apr_table_make (pool, APREQ_DEFAULT_NELTS)); + if (params == nullptr) + throw_internal_error (APR_ENOMEM, "apr_table_make"); + + // Parse the form data. + // + apr_status_t r (apreq_parser_run (parser, params, bb)); + if (r != APR_SUCCESS) + throw_bad_request (r); + + // Fill the parameter and file upload stream lists. + // + const apr_array_header_t* ps (apr_table_elts (params)); + size_t n (ps->nelts); + + for (auto p (reinterpret_cast<const apr_table_entry_t*> (ps->elts)); + n--; ++p) + { + assert (p->key != nullptr && p->val != nullptr); + + if (*p->key != '\0') + { + parameters_->emplace_back (p->key, optional<string> (p->val)); + + const apreq_param_t* ap (apreq_value_to_param (p->val)); + assert (ap != nullptr); // Must always be resolvable. + + uploads_->emplace_back (ap->upload != nullptr + ? new istream_buckets (ap->upload) + : nullptr); + } + } + } + + request::uploads_type& request:: + uploads () const + { + if (parameters_ == nullptr || url_only_parameters_) + sequence_error ("web::apache::request::uploads"); + + if (uploads_ == nullptr) + throw invalid_argument ("no uploads"); + + assert (uploads_->size () == parameters_->size ()); + return *uploads_; + } + + istream& request:: + open_upload (size_t index) + { + uploads_type& us (uploads ()); + size_t n (us.size ()); + + if (index >= n) + throw invalid_argument ("invalid index"); + + const unique_ptr<istream_buckets>& is (us[index]); + + if (is == nullptr) + throw invalid_argument ("no upload"); + + return *is; + } + + istream& request:: + open_upload (const string& name) + { + uploads_type& us (uploads ()); + size_t n (us.size ()); + + istream* r (nullptr); + for (size_t i (0); i < n; ++i) + { + if ((*parameters_)[i].name == name) + { + istream* is (us[i].get ()); + + if (is != nullptr) + { + if (r != nullptr) + throw invalid_argument ("multiple uploads for '" + name + "'"); + + r = is; + } + } + } + + if (r == nullptr) + throw invalid_argument ("no upload"); + + return *r; + } + + const name_values& request:: + headers () + { + if (headers_ == nullptr) + { + headers_.reset (new name_values ()); + + const apr_array_header_t* ha (apr_table_elts (rec_->headers_in)); + size_t n (ha->nelts); + + headers_->reserve (n + 1); // One for the custom :Client-IP header. + + auto add = [this] (const char* n, const char* v) + { + assert (n != nullptr && v != nullptr); + headers_->emplace_back (n, optional<string> (v)); + }; + + for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts)); + n--; ++h) + add (h->key, h->val); + + assert (rec_->connection != nullptr); + + add (":Client-IP", rec_->connection->client_ip); + } + + return *headers_; + } + + const name_values& request:: + cookies () + { + if (cookies_ == nullptr) + { + cookies_.reset (new name_values ()); + + const apr_array_header_t* ha (apr_table_elts (rec_->headers_in)); + size_t n (ha->nelts); + + for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts)); + n--; ++h) + { + assert (h->key != nullptr); + + if (icasecmp (h->key, "Cookie") == 0) + { + for (const char* n (h->val); n != nullptr; ) + { + const char* v (strchr (n, '=')); + const char* e (strchr (n, ';')); + + if (e != nullptr && e < v) + v = nullptr; + + string name (v != nullptr + ? mime_url_decode (n, v, true) + : (e + ? mime_url_decode (n, e, true) + : mime_url_decode (n, n + strlen (n), true))); + + optional<string> value; + + if (v++) + value = e + ? mime_url_decode (v, e, true) + : mime_url_decode (v, v + strlen (v), true); + + if (!name.empty () || value) + cookies_->emplace_back (move (name), move (value)); + + n = e ? e + 1 : nullptr; + } + } + } + } + + return *cookies_; + } + + ostream& request:: + content (status_code status, const string& type, bool buffer) + { + if (out_ && + + // Same status code. + // + status == rec_->status && + + // Same buffering flag. + // + buffer == + (dynamic_cast<stringbuf*> (out_buf_.get ()) != nullptr) && + + // Same content type. + // + icasecmp (type, rec_->content_type ? rec_->content_type : "") == 0) + { + // No change, return the existing stream. + // + return *out_; + } + + if (state_ >= request_state::writing) + throw sequence_error ("web::apache::request::content"); + + if (!buffer) + // Request body will be discarded prior first byte of content is + // written. Save form data now to make it available for future + // parameters() call. + // + // In the rare cases when the form data is expectedly bigger than 64K + // the client can always call parameters(limit) explicitly. + // + form_data (64 * 1024); + + unique_ptr<streambuf> out_buf ( + buffer + ? static_cast<streambuf*> (new stringbuf ()) + : static_cast<streambuf*> (new ostreambuf (rec_, *this))); + + out_.reset (new ostream (out_buf.get ())); + out_buf_ = move (out_buf); + out_->exceptions (ostream::eofbit | ostream::failbit | ostream::badbit); + + rec_->status = status; + + ap_set_content_type ( + rec_, + type.empty () ? nullptr : apr_pstrdup (rec_->pool, type.c_str ())); + + return *out_; + } + + void request:: + status (status_code status) + { + if (status != rec_->status) + { + // Setting status code in exception handler is a common usecase + // where no sense to throw but still need to signal apache a + // proper status code. + // + if (state_ >= request_state::writing && !current_exception ()) + throw sequence_error ("web::apache::request::status"); + + rec_->status = status; + out_.reset (); + out_buf_.reset (); + ap_set_content_type (rec_, nullptr); + } + } + + void request:: + cookie (const char* name, + const char* value, + const chrono::seconds* max_age, + const char* path, + const char* domain, + bool secure, + bool buffer) + { + assert (!buffer); // Cookie buffering is not implemented yet. + + string s (mime_url_encode (name)); + s += "="; + s += mime_url_encode (value); + + if (max_age) + { + timestamp tp (system_clock::now () + *max_age); + time_t t (system_clock::to_time_t (tp)); + + // Assume global locale is not changed and still "C". + // + char b[100]; + strftime (b, sizeof (b), "%a, %d-%b-%Y %H:%M:%S GMT", gmtime (&t)); + s += "; Expires="; + s += b; + } + + if (path) + { + s += ";Path="; + s += path; + } + + if (domain) + { + s += ";Domain="; + s += domain; + } + + if (secure) + s += ";Secure"; + + state (request_state::headers); + apr_table_add (rec_->err_headers_out, "Set-Cookie", s.c_str ()); + } + } +} diff --git a/web/server/apache/request.hxx b/web/server/apache/request.hxx new file mode 100644 index 0000000..bc105ec --- /dev/null +++ b/web/server/apache/request.hxx @@ -0,0 +1,233 @@ +// file : web/server/apache/request.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_APACHE_REQUEST_HXX +#define WEB_SERVER_APACHE_REQUEST_HXX + +#include <httpd.h> // request_rec, HTTP_*, OK, M_POST + +#include <chrono> +#include <memory> // unique_ptr +#include <string> +#include <vector> +#include <istream> +#include <ostream> +#include <streambuf> + +#include <web/server/module.hxx> +#include <web/server/apache/stream.hxx> + +namespace web +{ + namespace apache + { + // The state of the request processing, reflecting an interaction with + // Apache API (like reading/writing content function calls), with no + // buffering taken into account. Any state different from the initial + // suppose that some irrevocable interaction with Apache API have + // happened, so request processing should be either completed, or + // reported as failed. State values are ordered in a sense that the + // higher value reflects the more advanced stage of processing, so the + // request current state value may not decrease. + // + enum class request_state + { + // Denotes the initial stage of the request handling. At this stage + // the request line and headers are already parsed by Apache. + // + initial, + + // Reading the request content. + // + reading, + + // Adding the response headers (cookies in particular). + // + headers, + + // Writing the response content. + // + writing + }; + + // Extends istreambuf with read limit checking, caching, etc. (see the + // implementation for details). + // + class istreambuf_cache; + + // Stream type for reading from Apache's bucket brigades. + // + class istream_buckets; + + class request: public web::request, + public web::response, + public stream_state + { + friend class service; + + // Can not be inline/default due to the member of + // unique_ptr<istreambuf_cache> type. Note that istreambuf_cache type is + // incomplete. + // + request (request_rec* rec) noexcept; + ~request (); + + request_state + state () const noexcept {return state_;} + + // Flush the buffered response content if present. The returned value + // should be passed to Apache API on request handler exit. + // + int + flush (); + + // Prepare for the request re-processing if possible (no unbuffered + // read/write operations have been done). Throw sequence_error + // otherwise. In particular, the preparation can include the response + // content buffer cleanup, the request content buffer rewind. + // + void + rewind (); + + // Get request path. + // + virtual const path_type& + path (); + + // Get request body data stream. + // + virtual std::istream& + content (std::size_t limit = 0, std::size_t buffer = 0); + + // Get request parameters. + // + virtual const name_values& + parameters (std::size_t limit, bool url_only = false); + + // Get upload stream. + // + virtual std::istream& + open_upload (std::size_t index); + + virtual std::istream& + open_upload (const std::string& name); + + // Get request headers. + // + virtual const name_values& + headers (); + + // Get request cookies. + // + virtual const name_values& + cookies (); + + // Get response status code. + // + status_code + status () const noexcept {return rec_->status;} + + // Set response status code. + // + virtual void + status (status_code status); + + // Set response status code, content type and get body stream. + // + virtual std::ostream& + content (status_code status, + const std::string& type, + bool buffer = true); + + // Add response cookie. + // + virtual void + cookie (const char* name, + const char* value, + const std::chrono::seconds* max_age = nullptr, + const char* path = nullptr, + const char* domain = nullptr, + bool secure = false, + bool buffer = true); + + private: + // On the first call cache the application/x-www-form-urlencoded or + // multipart/form-data form data for the subsequent parameters parsing + // and set the multipart flag accordingly. Don't cache if the request is + // in the reading or later state. Return true if the cache contains the + // form data. + // + // Note that the function doesn't change the content buffering (see + // content() function for details) nor rewind the content stream after + // reading. + // + bool + form_data (std::size_t limit); + + // Used to also parse application/x-www-form-urlencoded POST body. + // + void + parse_url_parameters (const char* args); + + void + parse_multipart_parameters (const std::vector<char>& body); + + // Return a list of the upload input streams. Throw sequence_error if + // the parameters() function was not called yet. Throw invalid_argument + // if the request doesn't contain multipart form data. + // + using uploads_type = std::vector<std::unique_ptr<istream_buckets>>; + + uploads_type& + uploads () const; + + // Advance the request processing state. Noop if new state is equal to + // the current one. Throw sequence_error if the new state is less then + // the current one. Can throw invalid_request if HTTP request is + // malformed. + // + void + state (request_state); + + // stream_state members implementation. + // + virtual void + set_read_state () {state (request_state::reading);} + + virtual void + set_write_state () {state (request_state::writing);} + + private: + request_rec* rec_; + request_state state_ = request_state::initial; + + path_type path_; + + std::unique_ptr<name_values> parameters_; + bool url_only_parameters_; // Meaningless if parameters_ is NULL; + + // Uploaded file streams. If not NULL, is parallel to the parameters + // list. + // + std::unique_ptr<uploads_type> uploads_; + + std::unique_ptr<name_values> headers_; + std::unique_ptr<name_values> cookies_; + + // Form data cache. Is empty if the body doesn't contain the form data. + // + std::unique_ptr<std::vector<char>> form_data_; + bool form_multipart_; // Meaningless if form_data_ is NULL or empty; + + std::unique_ptr<istreambuf_cache> in_buf_; + std::unique_ptr<std::istream> in_; + + std::unique_ptr<std::streambuf> out_buf_; + std::unique_ptr<std::ostream> out_; + }; + } +} + +#include <web/server/apache/request.ixx> + +#endif // WEB_SERVER_APACHE_REQUEST_HXX diff --git a/web/server/apache/request.ixx b/web/server/apache/request.ixx new file mode 100644 index 0000000..119fd2e --- /dev/null +++ b/web/server/apache/request.ixx @@ -0,0 +1,45 @@ +// file : web/server/apache/request.ixx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <http_protocol.h> // ap_*() + +#include <sstream> // stringbuf + +namespace web +{ + namespace apache + { + inline int request:: + flush () + { + if (std::stringbuf* b = dynamic_cast<std::stringbuf*> (out_buf_.get ())) + { + // Response content is buffered. + // + std::string s (b->str ()); + + if (!s.empty ()) + { + try + { + state (request_state::writing); + + if (ap_rwrite (s.c_str (), s.length (), rec_) < 0) + rec_->status = HTTP_REQUEST_TIME_OUT; + } + catch (const invalid_request& e) + { + rec_->status = e.status; + } + } + + out_.reset (); + out_buf_.reset (); + } + + return rec_->status == HTTP_OK || state_ >= request_state::writing + ? OK + : rec_->status; + } + } +} diff --git a/web/server/apache/service.cxx b/web/server/apache/service.cxx new file mode 100644 index 0000000..9fb23da --- /dev/null +++ b/web/server/apache/service.cxx @@ -0,0 +1,268 @@ +// file : web/server/apache/service.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <web/server/apache/service.hxx> + +#include <apr_pools.h> // apr_palloc() + +#include <httpd.h> // server_rec +#include <http_config.h> // command_rec, cmd_*, ap_get_module_config() + +#include <memory> // unique_ptr +#include <string> +#include <cassert> +#include <utility> // move() +#include <cstring> // strlen(), strcmp() +#include <exception> + +#include <libbutl/utility.mxx> // function_cast() +#include <libbutl/optional.mxx> + +#include <web/server/module.hxx> +#include <web/server/apache/log.hxx> + +using namespace std; +using namespace butl; + +namespace web +{ + namespace apache + { + void service:: + init_directives () + { + assert (cmds == nullptr); + + // Fill apache module directive definitions. Directives share common + // name space in apache configuration file, so to prevent name clash + // have to form directive name as a combination of module and option + // names: <module name>-<option name>. This why for option bar of module + // foo the corresponding directive will appear in apache configuration + // file as foo-bar. + // + const option_descriptions& od (exemplar_.options ()); + unique_ptr<command_rec[]> directives (new command_rec[od.size () + 2]); + command_rec* d (directives.get ()); + + for (const auto& o: od) + { + auto i ( + option_descriptions_.emplace (name_ + "-" + o.first, o.second)); + assert (i.second); + + *d++ = + { + i.first->first.c_str (), + function_cast<cmd_func> (parse_option), + this, + + // Allow directives in both server and directory configuration + // scopes. + // + RSRC_CONF | ACCESS_CONF, + + // Move away from TAKE1 to be able to handle empty string and + // no-value. + // + RAW_ARGS, + + nullptr + }; + } + + // Track if the handler is allowed to handle a request in the specific + // configuration scope. The handler exemplar will be created (and + // initialized) only for configuration contexts that have + // 'SetHandler <mod_name>' in effect for the corresponding scope. + // + *d++ = + { + "SetHandler", + function_cast<cmd_func> (parse_option), + this, + RSRC_CONF | ACCESS_CONF, + RAW_ARGS, + nullptr + }; + + *d = {nullptr, nullptr, nullptr, 0, RAW_ARGS, nullptr}; + cmds = directives.release (); + } + + void* service:: + create_server_context (apr_pool_t* pool, server_rec*) noexcept + { + // Create the object using the configuration memory pool provided by the + // Apache API. The lifetime of the object is equal to the lifetime of + // the pool. + // + void* p (apr_palloc (pool, sizeof (context))); + assert (p != nullptr); + return new (p) context (); + } + + void* service:: + create_dir_context (apr_pool_t* pool, char* dir) noexcept + { + // Create the object using the configuration memory pool provided by the + // Apache API. The lifetime of the object is equal to the lifetime of + // the pool. + // + void* p (apr_palloc (pool, sizeof (context))); + assert (p != nullptr); + + // For the user-defined directory configuration context dir is the path + // of the corresponding directive. For the special server directory + // invented by Apache for server scope directives, dir is NULL. + // + return new (p) context (dir == nullptr); + } + + const char* service:: + parse_option (cmd_parms* parms, void* conf, const char* args) noexcept + { + service& srv (*reinterpret_cast<service*> (parms->cmd->cmd_data)); + + if (srv.options_parsed_) + // Apache have started the second pass of its messy initialization + // cycle (more details at http://wiki.apache.org/httpd/ModuleLife). + // This time we are parsing for real. Cleanup the existing config, and + // start building the new one. + // + srv.clear_config (); + + // 'args' is an optionally double-quoted string. It uses double quotes + // to distinguish empty string from no-value case. + // + assert (args != nullptr); + + optional<string> value; + if (auto l = strlen (args)) + value = l >= 2 && args[0] == '"' && args[l - 1] == '"' + ? string (args + 1, l - 2) + : args; + + // Determine the directory and server configuration contexts for the + // option. + // + context* dir_context (context_cast (conf)); + assert (dir_context != nullptr); + + server_rec* server (parms->server); + assert (server != nullptr); + assert (server->module_config != nullptr); + + context* srv_context ( + context_cast (ap_get_module_config (server->module_config, &srv))); + + assert (srv_context != nullptr); + + // Associate the directory configuration context with the enclosing + // server configuration context. + // + context*& s (dir_context->server); + if (s == nullptr) + s = srv_context; + else + assert (s == srv_context); + + // If the option appears in the special directory configuration context, + // add it to the enclosing server context instead. This way it will be + // possible to complement all server-enclosed contexts (including this + // special one) with the server scope options. + // + context* c (dir_context->special ? srv_context : dir_context); + + if (dir_context->special) + // + // Make sure the special directory context is also in the option lists + // map. Later the context will be populated with an enclosing server + // context options. + // + srv.options_.emplace (dir_context, name_values ()); + + const char* name (parms->cmd->name); + if (strcmp (name, "SetHandler") == 0) + { + // Keep track of a request handling allowability. + // + srv.options_.emplace (c, name_values ()).first->first->handling = + value && *value == srv.name_ + ? request_handling::allowed + : request_handling::disallowed; + + return 0; + } + + return srv.add_option (c, name, move (value)); + } + + const char* service:: + add_option (context* ctx, const char* name, optional<string> value) + { + auto i (option_descriptions_.find (name)); + assert (i != option_descriptions_.end ()); + + // Check that option value presense is expected. + // + if (i->second != static_cast<bool> (value)) + return value ? "unexpected value" : "value expected"; + + options_[ctx].emplace_back (name + name_.length () + 1, move (value)); + return 0; + } + + void service:: + complement (context* enclosed, context* enclosing) + { + auto i (options_.find (enclosing)); + + // The enclosing context may have no options. It can be the context of a + // server that has no configuration directives in it's immediate scope, + // but has ones in it's enclosed scope (directory or virtual server). + // + if (i != options_.end ()) + { + const name_values& src (i->second); + name_values& dest (options_[enclosed]); + dest.insert (dest.begin (), src.begin (), src.end ()); + } + + if (enclosed->handling == request_handling::inherit) + enclosed->handling = enclosing->handling; + } + + void service:: + finalize_config (server_rec* s) + { + if (!version_logged_) + { + log l (s, this); + exemplar_.version (l); + version_logged_ = true; + } + + // Complement directory configuration contexts with options of the + // enclosing server configuration context. By this time virtual server + // contexts are already complemented with the main server configuration + // context options as a result of the merge_server_context() calls. + // + for (const auto& o: options_) + { + // Is a directory configuration context. + // + if (o.first->server != nullptr) + complement (o.first, o.first->server); + } + + options_parsed_ = true; + } + + void service:: + clear_config () + { + options_.clear (); + options_parsed_ = false; + } + } +} diff --git a/web/server/apache/service.hxx b/web/server/apache/service.hxx new file mode 100644 index 0000000..ad54d2c --- /dev/null +++ b/web/server/apache/service.hxx @@ -0,0 +1,333 @@ +// file : web/server/apache/service.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_APACHE_SERVICE_HXX +#define WEB_SERVER_APACHE_SERVICE_HXX + +#include <apr_pools.h> // apr_pool_t +#include <apr_hooks.h> // APR_HOOK_* + +#include <httpd.h> // request_rec, server_rec, HTTP_*, DECLINED +#include <http_config.h> // module, cmd_parms, ap_hook_*() + +#include <map> +#include <memory> // unique_ptr +#include <string> +#include <cassert> + +#include <web/server/module.hxx> +#include <web/server/apache/log.hxx> +#include <web/server/apache/request.hxx> + +namespace web +{ + namespace apache + { + // Apache has 3 configuration scopes: main server, virtual server, and + // directory (location). It provides configuration scope-aware modules + // with the ability to build a hierarchy of configuration contexts. Later, + // when processing a request, Apache passes the appropriate directory + // configuration context to the request handler. + // + // This Apache service implementation first makes a copy of the provided + // (in the constructor below) handler exemplar for each directory context. + // It then initializes each of these "context exemplars" with the (merged) + // set of configuration options. Finally, when handling a request, it + // copies the corresponding "context exemplar" to create the "handling + // instance". Note that the "context exemplars" are created as a copy of + // the provided exemplar, which is never initialized. As a result, it is + // possible to detect if the handler's copy constructor is used to create + // a "context exemplar" or a "handling instance". + // + class service: ::module + { + public: + // Note that the module exemplar is stored by-reference. + // + template <typename H> + service (const std::string& name, H& exemplar) + : ::module + { + STANDARD20_MODULE_STUFF, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + ®ister_hooks<H> + +#ifdef AP_MODULE_HAS_FLAGS + , AP_MODULE_FLAG_NONE +#endif + }, + name_ (name), + exemplar_ (exemplar) + { + init_directives (); + + // Set configuration context management hooks. + // + // The overall process of building the configuration hierarchy for a + // handler is as follows: + // + // 1. Apache creates directory and server configuration contexts for + // scopes containing handler-defined directives by calling the + // create_{server,dir}_context() callback functions. For directives + // at the server scope the special directory context is created as + // well. + // + // 2. Apache calls parse_option() function for each handler-defined + // directive. The function parses the directives and places the + // resulting options into the corresponding configuration context. + // It also establishes the directory-server contexts relations. + // + // 3. Apache calls merge_server_context() function for each virtual + // server. The function complements virtual server context options + // with the ones from the main server. + // + // 4. Apache calls config_finalizer() which complements the directory + // contexts options with the ones from the enclosing servers. + // + // 5. Apache calls worker_initializer() which creates handler exemplar + // for each directory configuration context that have + // 'SetHandler <mod_name>' directive in effect for it. + // + // References: + // http://www.apachetutor.org/dev/config + // http://httpd.apache.org/docs/2.4/developer/modguide.html + // http://wiki.apache.org/httpd/ModuleLife + // + create_server_config = &create_server_context; + create_dir_config = &create_dir_context; + merge_server_config = &merge_server_context<H>; + + // instance<H> () is invented to delegate processing from apache + // request handler C function to the service non static member + // function. This appoach resticts number of service objects per + // specific handler implementation class with just one instance. + // + service*& srv (instance<H> ()); + assert (srv == nullptr); + srv = this; + } + + ~service () + { + delete [] cmds; + } + + private: + template <typename H> + static service*& + instance () noexcept + { + static service* instance; + return instance; + } + + template <typename H> + static void + register_hooks (apr_pool_t*) noexcept + { + // The config_finalizer() function is called at the end of Apache + // server configuration parsing. + // + ap_hook_post_config (&config_finalizer<H>, NULL, NULL, APR_HOOK_LAST); + + // The worker_initializer() function is called right after Apache + // worker process is started. Called for every new process spawned. + // + ap_hook_child_init ( + &worker_initializer<H>, NULL, NULL, APR_HOOK_LAST); + + // The request_handler () function is called for each client request. + // + ap_hook_handler (&request_handler<H>, NULL, NULL, APR_HOOK_LAST); + } + + template <typename H> + static int + config_finalizer (apr_pool_t*, apr_pool_t*, apr_pool_t*, server_rec* s) + noexcept + { + instance<H> ()->finalize_config (s); + return OK; + } + + template <typename H> + static void + worker_initializer (apr_pool_t*, server_rec* s) noexcept + { + auto srv (instance<H> ()); + log l (s, srv); + srv->template init_worker<H> (l); + } + + template <typename H> + static int + request_handler (request_rec* r) noexcept; + + private: + + // Reflects the allowability of the request handling in the specific + // configuration scope. + // + enum class request_handling + { + // Configuration scope has 'SetHandler <mod_name>' directive + // specified. The handler is allowed to handle a request in the scope. + // + allowed, + + // Configuration scope has 'SetHandler <other_mod_name>|None' + // directive specified. The handler is disallowed to handle a request + // in the scope. + // + disallowed, + + // + // Note that if there are several SetHandler directives specified + // in the specific scope, then the latest one takes the precedence. + + // Configuration scope has no SetHandler directive specified. The + // request handling allowability is established by the enclosing + // scopes. + // + inherit + }; + + // Our representation of the Apache configuration context. + // + // The lifetime of this object is under the control of the Apache API, + // which treats it as a raw sequence of bytes. In order not to tinker + // with the C-style structures and APR memory pools, we will keep it a + // (C++11) POD type with just the members required to maintain the + // context hierarchy. + // + // We will then use the pointers to these context objects as keys in + // maps to (1) the corresponding application-level option lists during + // the configuration cycle and to (2) the corresponding handler exemplar + // during the HTTP request handling phase. We will also use the same + // type for both directory and server configuration contexts. + // + struct context + { + // Outer (server) configuration context for the directory + // configuration context, NULL otherwise. + // + context* server = nullptr; + + // If module directives appear directly in the server configuration + // scope, Apache creates a special directory context for them. This + // context appears at the same hierarchy level as the user-defined + // directory contexts of the same server scope. + // + bool special; + + // Request handling allowability for the corresponding configuration + // scope. + // + request_handling handling = request_handling::inherit; + + // Create the server configuration context. + // + context (): special (false) {} + + // Create the directory configuration context. Due to the Apache API + // implementation details it is not possible to detect the enclosing + // server configuration context at the time of directory context + // creation. As a result, the server member is set by the module's + // parse_option() function. + // + context (bool s): special (s) {} + + // Ensure the object is only destroyed by Apache. + // + ~context () = delete; + }; + + static context* + context_cast (void* config) noexcept + {return static_cast<context*> (config);} + + private: + void + init_directives (); + + // Create the server configuration context. Called by the Apache API + // whenever a new object of that type is required. + // + static void* + create_server_context (apr_pool_t*, server_rec*) noexcept; + + // Create the server directory configuration context. Called by the + // Apache API whenever a new object of that type is required. + // + static void* + create_dir_context (apr_pool_t*, char* dir) noexcept; + + template <typename H> + static void* + merge_server_context (apr_pool_t*, void* enclosing, void* enclosed) + noexcept + { + instance<H> ()->complement ( + context_cast (enclosed), context_cast (enclosing)); + + return enclosed; + } + + static const char* + parse_option (cmd_parms* parms, void* conf, const char* args) noexcept; + + const char* + add_option (context*, const char* name, optional<std::string> value); + + void + finalize_config (server_rec*); + + void + clear_config (); + + // Complement the enclosed context with options of the enclosing one. + // If the 'handling' member of the enclosed context is set to + // request_handling::inherit value, assign it a value from the enclosing + // context. + // + void + complement (context* enclosed, context* enclosing); + + template <typename H> + void + init_worker (log&); + + template <typename H> + int + handle (request&, const context*, log&) const; + + private: + std::string name_; + handler& exemplar_; + option_descriptions option_descriptions_; + + // The context objects pointed to by the key can change during the + // configuration phase. + // + using options = std::map<context*, name_values>; + options options_; + + // The context objects pointed to by the key can not change during the + // request handling phase. + // + using exemplars = std::map<const context*, std::unique_ptr<handler>>; + exemplars exemplars_; + + bool options_parsed_ = false; + bool version_logged_ = false; + }; + } +} + +#include <web/server/apache/service.txx> + +#endif // WEB_SERVER_APACHE_SERVICE_HXX diff --git a/web/server/apache/service.txx b/web/server/apache/service.txx new file mode 100644 index 0000000..1b16d0b --- /dev/null +++ b/web/server/apache/service.txx @@ -0,0 +1,213 @@ +// file : web/server/apache/service.txx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <httpd.h> // APEXIT_CHILDSICK +#include <http_log.h> // APLOG_* + +#include <cstdlib> // exit() +#include <utility> // move() +#include <exception> + +#include <libbutl/utility.mxx> // operator<<(ostream, exception) + +namespace web +{ + namespace apache + { + template <typename H> + void service:: + init_worker (log& l) + { + using namespace std; + + const string func_name ( + "web::apache::service<" + name_ + ">::init_worker"); + + try + { + const H* exemplar (dynamic_cast<const H*> (&exemplar_)); + assert (exemplar != nullptr); + + // For each directory configuration context, for which the handler is + // allowed to handle a request, create the handler exemplar as a deep + // copy of the exemplar_ member, and initialize it with the + // context-specific option list. + // + for (const auto& o: options_) + { + const context* c (o.first); + + if (c->server != nullptr && // Is a directory configuration context. + c->handling == request_handling::allowed) + { + auto r ( + exemplars_.emplace ( + c, + unique_ptr<handler> (new H (*exemplar)))); + + r.first->second->init (o.second, l); + } + } + + // Options are not needed anymore. Free up the space. + // + options_.clear (); + } + catch (const exception& e) + { + l.write (nullptr, 0, func_name.c_str (), APLOG_EMERG, e.what ()); + + // Terminate the worker apache process. APEXIT_CHILDSICK indicates to + // the root process that the worker have exited due to a resource + // shortage. In this case the root process limits the rate of forking + // until situation is resolved. + // + // If the root process fails to create any worker process on startup, + // the behaviour depends on the Multi-Processing Module enabled. For + // mpm_worker_module and mpm_event_module the root process terminates. + // For mpm_prefork_module it keeps trying to create the worker process + // at one-second intervals. + // + // If the root process loses all it's workers while running (for + // example due to the MaxRequestsPerChild directive), and fails to + // create any new ones, it keeps trying to create the worker process + // at one-second intervals. + // + exit (APEXIT_CHILDSICK); + } + catch (...) + { + l.write (nullptr, + 0, + func_name.c_str (), + APLOG_EMERG, + "unknown error"); + + // Terminate the worker apache process. + // + exit (APEXIT_CHILDSICK); + } + } + + template <typename H> + int service:: + request_handler (request_rec* r) noexcept + { + auto srv (instance<H> ()); + if (!r->handler || srv->name_ != r->handler) return DECLINED; + + assert (r->per_dir_config != nullptr); + + // Obtain the request-associated configuration context. + // + const context* cx ( + context_cast (ap_get_module_config (r->per_dir_config, srv))); + + assert (cx != nullptr); + + request rq (r); + log lg (r->server, srv); + return srv->template handle<H> (rq, cx, lg); + } + + template <typename H> + int service:: + handle (request& rq, const context* cx, log& lg) const + { + using namespace std; + + static const string func_name ( + "web::apache::service<" + name_ + ">::handle"); + + try + { + auto i (exemplars_.find (cx)); + assert (i != exemplars_.end ()); + + const H* e (dynamic_cast<const H*> (i->second.get ())); + assert (e != nullptr); + + for (H h (*e);;) + { + try + { + if (static_cast<handler&> (h).handle (rq, rq, lg)) + return rq.flush (); + + if (rq.state () == request_state::initial) + return DECLINED; + + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, + "handling declined being partially executed"); + break; + } + catch (const handler::retry&) + { + // Retry to handle the request. + // + rq.rewind (); + } + } + } + catch (const invalid_request& e) + { + if (!e.content.empty () && rq.state () < request_state::writing) + { + try + { + rq.content (e.status, e.type) << e.content << endl; + return rq.flush (); + } + catch (const exception& e) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); + } + } + + return e.status; + } + catch (const exception& e) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); + + if (*e.what () && rq.state () < request_state::writing) + { + try + { + rq.content ( + HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8") + << e << endl; + + return rq.flush (); + } + catch (const exception& e) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); + } + } + } + catch (...) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, "unknown error"); + + if (rq.state () < request_state::writing) + { + try + { + rq.content ( + HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8") + << "unknown error" << endl; + + return rq.flush (); + } + catch (const exception& e) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); + } + } + } + + return HTTP_INTERNAL_SERVER_ERROR; + } + } +} diff --git a/web/server/apache/stream.hxx b/web/server/apache/stream.hxx new file mode 100644 index 0000000..77145af --- /dev/null +++ b/web/server/apache/stream.hxx @@ -0,0 +1,148 @@ +// file : web/server/apache/stream.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_APACHE_STREAM_HXX +#define WEB_SERVER_APACHE_STREAM_HXX + +#include <httpd.h> // request_rec, HTTP_* +#include <http_protocol.h> // ap_*() + +#include <ios> // streamsize +#include <vector> +#include <cstring> // memmove(), size_t +#include <streambuf> +#include <algorithm> // min(), max() + +#include <web/server/module.hxx> // invalid_request + +namespace web +{ + namespace apache + { + // Object of a class implementing this interface is intended for keeping + // the state of communication with the client. + // + struct stream_state + { + // Called by istreambuf functions when content is about to be read from + // the client. Can throw invalid_request or sequence_error. + // + virtual void + set_read_state () = 0; + + // Called by ostreambuf functions when some content is about to be + // written to the client. Can throw invalid_request or sequence_error. + // + virtual void + set_write_state () = 0; + }; + + // Base class for ostreambuf and istreambuf. References request and + // communication state structures. + // + class rbuf: public std::streambuf + { + protected: + rbuf (request_rec* r, stream_state& s): rec_ (r), state_ (s) {} + + protected: + request_rec* rec_; + stream_state& state_; + }; + + class ostreambuf: public rbuf + { + public: + ostreambuf (request_rec* r, stream_state& s): rbuf (r, s) {} + + private: + virtual int_type + overflow (int_type c) + { + if (c != traits_type::eof ()) + { + state_.set_write_state (); + + char chr (c); + + // Throwing allows to distinguish comm failure from other IO error + // conditions. + // + if (ap_rwrite (&chr, sizeof (chr), rec_) == -1) + throw invalid_request (HTTP_REQUEST_TIME_OUT); + } + + return c; + } + + virtual std::streamsize + xsputn (const char* s, std::streamsize num) + { + state_.set_write_state (); + + if (ap_rwrite (s, num, rec_) < 0) + throw invalid_request (HTTP_REQUEST_TIME_OUT); + + return num; + } + + virtual int + sync () + { + if (ap_rflush (rec_) < 0) + throw invalid_request (HTTP_REQUEST_TIME_OUT); + + return 0; + } + }; + + class istreambuf: public rbuf + { + public: + istreambuf (request_rec* r, + stream_state& s, + size_t bufsize = 1024, + size_t putback = 1) + : rbuf (r, s), + bufsize_ (std::max (bufsize, (size_t)1)), + putback_ (std::min (putback, bufsize_ - 1)), + buf_ (bufsize_) + { + char* p (buf_.data () + putback_); + setg (p, p, p); + } + + protected: + virtual int_type + underflow () + { + if (gptr () < egptr ()) + return traits_type::to_int_type (*gptr ()); + + state_.set_read_state (); + + size_t pb (std::min ((size_t)(gptr () - eback ()), putback_)); + std::memmove (buf_.data () + putback_ - pb, gptr () - pb, pb); + + char* p (buf_.data () + putback_); + int rb (ap_get_client_block (rec_, p, bufsize_ - putback_)); + + if (rb == 0) + return traits_type::eof (); + + if (rb < 0) + throw invalid_request (HTTP_REQUEST_TIME_OUT); + + setg (p - pb, p, p + rb); + return traits_type::to_int_type (*gptr ()); + } + + protected: + size_t bufsize_; + size_t putback_; + std::vector<char> buf_; + }; + } +} + +#endif // WEB_SERVER_APACHE_STREAM_HXX diff --git a/web/server/buildfile b/web/server/buildfile new file mode 100644 index 0000000..26de70f --- /dev/null +++ b/web/server/buildfile @@ -0,0 +1,15 @@ +# file : web/server/buildfile +# license : MIT; see accompanying LICENSE file + +# This is currently part of the brep apache module but lives in a separate +# directory. Thus the strange choices: libus{}, no header installation, etc. + +# While we don't need to link to APR, we need to find its header location. +# +import libs = libapr1%lib{apr-1} +import libs += libapreq2%lib{apreq2} +import libs += libbutl%lib{butl} + +libus{web-server}: {hxx ixx txx cxx}{**} $libs + +{hxx ixx txx}{*}: install = false diff --git a/web/server/mime-url-encoding.cxx b/web/server/mime-url-encoding.cxx new file mode 100644 index 0000000..fd1e4e8 --- /dev/null +++ b/web/server/mime-url-encoding.cxx @@ -0,0 +1,66 @@ +// file : web/server/mime-url-encoding.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <web/server/mime-url-encoding.hxx> + +#include <string> +#include <iterator> // back_inserter + +#include <libbutl/url.mxx> + +using namespace std; +using namespace butl; + +namespace web +{ + inline static bool + encode_query (char& c) + { + if (c == ' ') + { + c = '+'; + return false; + } + + return !url::unreserved (c); + } + + string + mime_url_encode (const char* v, bool query) + { + return query ? url::encode (v, encode_query) : url::encode (v); + } + + string + mime_url_encode (const string& v, bool query) + { + return query ? url::encode (v, encode_query) : url::encode (v); + } + + string + mime_url_decode (const char* b, const char* e, bool trim, bool query) + { + if (trim) + { + for (; b != e && *b == ' '; ++b) ; + + if (b == e) + return string (); + + while (*--e == ' '); + ++e; + } + + string r; + if (!query) + url::decode (b, e, back_inserter (r)); + else + url::decode (b, e, back_inserter (r), + [] (char& c) + { + if (c == '+') + c = ' '; + }); + return r; + } +} diff --git a/web/server/mime-url-encoding.hxx b/web/server/mime-url-encoding.hxx new file mode 100644 index 0000000..34172a4 --- /dev/null +++ b/web/server/mime-url-encoding.hxx @@ -0,0 +1,32 @@ +// file : web/server/mime-url-encoding.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_MIME_URL_ENCODING_HXX +#define WEB_SERVER_MIME_URL_ENCODING_HXX + +#include <string> + +namespace web +{ + // URL-encode characters other than unreserved (see RFC3986). If the query + // flag is true, then the encoding is applied to the URL query part, and so + // convert space characters to plus characters rather than percent-encode + // them. + // + std::string + mime_url_encode (const char*, bool query = true); + + std::string + mime_url_encode (const std::string&, bool query = true); + + // If the query flag is true, then convert plus characters to space + // characters (see above). Throw std::invalid_argument if an invalid encoding + // sequence is encountered. + // + std::string + mime_url_decode (const char* b, const char* e, + bool trim = false, + bool query = true); +} + +#endif // WEB_SERVER_MIME_URL_ENCODING_HXX diff --git a/web/server/module.hxx b/web/server/module.hxx new file mode 100644 index 0000000..beda73c --- /dev/null +++ b/web/server/module.hxx @@ -0,0 +1,299 @@ +// file : web/server/module.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_MODULE_HXX +#define WEB_SERVER_MODULE_HXX + +#include <map> +#include <string> +#include <vector> +#include <iosfwd> +#include <chrono> +#include <cstdint> // uint16_t +#include <cstddef> // size_t +#include <utility> // move() +#include <stdexcept> // runtime_error + +#include <libbutl/path.mxx> +#include <libbutl/optional.mxx> + +namespace web +{ + using butl::optional; + + // HTTP status code. + // + // @@ Define some commonly used constants? + // + using status_code = std::uint16_t; + + // This exception is used to signal that the request is invalid + // (4XX codes) rather than that it could not be processed (5XX). + // By default 400 is returned, which means the request is malformed. + // + // If caught by the web server implementation, it will try to return + // the specified status and content to the client, if possible. + // It is, however, may not be possible if some unbuffered content has + // already been written. The behavior in this case is implementation- + // specific and may result in no indication of an error being sent to + // the client. + // + struct invalid_request + { + status_code status; + std::string content; + std::string type; + + //@@ Maybe optional "try again" link? + // + invalid_request (status_code s = 400, + std::string c = "", + std::string t = "text/plain;charset=utf-8") + : status (s), content (std::move (c)), type (std::move (t)) {} + }; + + // Exception indicating HTTP request/response sequencing error. + // For example, trying to change the status code after some + // content has already been written. + // + struct sequence_error: std::runtime_error + { + sequence_error (std::string d): std::runtime_error (std::move (d)) {} + }; + + // Map of handler configuration option names to the boolean flag indicating + // whether the value is expected for the option. + // + using option_descriptions = std::map<std::string, bool>; + + struct name_value + { + // These should eventually become string_view's. + // + std::string name; + optional<std::string> value; + + name_value () {} + name_value (std::string n, optional<std::string> v) + : name (std::move (n)), value (std::move (v)) {} + }; + + using name_values = std::vector<name_value>; + using butl::path; + + class request + { + public: + using path_type = web::path; + + virtual + ~request () = default; + + // Corresponds to abs_path portion of HTTP URL as described in "3.2.2 HTTP + // URL" of http://tools.ietf.org/html/rfc2616. Returns '/' if no abs_path + // is present in URL. + // + virtual const path_type& + path () = 0; + + //@@ Why not pass parameters directly? Lazy parsing? + //@@ Why not have something like operator[] for lookup? Probably + // in name_values. + //@@ Maybe parameter_list() and parameter_map()? + // + // Parse parameters from the URL query part and from the HTTP POST request + // body for the application/x-www-form-urlencoded or multipart/form-data + // content type. Optionally limit the amount of data read from the body + // (see the content() function for the semantics). Throw invalid_request + // if parameters decoding fails. + // + virtual const name_values& + parameters (std::size_t limit, bool url_only = false) = 0; + + // Open the input stream for the upload corresponding to the specified + // parameter index. Must be called after the parameters() function is + // called, throw sequence_error if that's not the case. Throw + // invalid_argument if the index doesn't have an upload (for example, + // because the parameter is not <input type="file"/> form field). + // + // Note also that reopening the same upload (within the same retry) + // returns the same stream reference. + // + virtual std::istream& + open_upload (std::size_t index) = 0; + + // As above but specify the parameter by name. Throw invalid_argument if + // there are multiple uploads for this parameter name. + // + virtual std::istream& + open_upload (const std::string& name) = 0; + + // Request headers. + // + // The implementation may add custom pseudo-headers reflecting additional + // request options. Such headers should start with ':'. If possible, the + // implementation should add the following well-known pseudo-headers: + // + // :Client-IP - IP address of the connecting client. + // + virtual const name_values& + headers () = 0; + + // Throw invalid_request if cookies are malformed. + // + virtual const name_values& + cookies () = 0; + + // Get the stream to read the request content from. If the limit argument + // is zero, then the content limit is left unchanged (unlimited initially). + // Otherwise the requested limit is set, and the invalid_request exception + // with the code 413 (payload too large) will be thrown when the specified + // limit is reached while reading from the stream. If the buffer argument + // is zero, then the buffer size is left unchanged (zero initially). If it + // is impossible to increase the buffer size (because, for example, some + // content is already read unbuffered), then the sequence_error is thrown. + // + // Note that unread input content is discarded when any unbuffered content + // is written, and any attempt to read it will result in the + // sequence_error exception being thrown. + // + virtual std::istream& + content (std::size_t limit, std::size_t buffer = 0) = 0; + }; + + class response + { + public: + virtual + ~response () = default; + + // Set status code, content type, and get the stream to write + // the content to. If the buffer argument is true (default), + // then buffer the entire content before sending it as a + // response. This allows us to change the status code in + // case of an error. + // + // Specifically, if there is already content in the buffer + // and the status code is changed, then the old content is + // discarded. If the content was not buffered and the status + // is changed, then the sequence_error exception is thrown. + // If this exception leaves handler::handle(), then the + // implementation shall terminate the response in a suitable + // but unspecified manner. In particular, there is no guarantee + // that the user will be notified of an error or observe the + // new status. + // + virtual std::ostream& + content (status_code code = 200, + const std::string& type = "application/xhtml+xml;charset=utf-8", + bool buffer = true) = 0; + + // Set status code without writing any content. On status change, + // discard buffered content or throw sequence_error if content was + // not buffered. + // + virtual void + status (status_code) = 0; + + // Throw sequence_error if some unbuffered content has already + // been written. + // + virtual void + cookie (const char* name, + const char* value, + const std::chrono::seconds* max_age = nullptr, + const char* path = nullptr, + const char* domain = nullptr, + bool secure = false, + bool buffer = true) = 0; + }; + + // A web server logging backend. The handler can use it to log + // diagnostics that is meant for the web server operator rather + // than the user. + // + // The handler can cast this basic interface to the web server's + // specific implementation that may provide a richer interface. + // + class log + { + public: + virtual + ~log () = default; + + virtual void + write (const char* msg) = 0; + }; + + // The web server creates a new handler instance for each request + // by copy-initializing it with the handler exemplar. This way we + // achieve two things: we can freely use handler data members + // without worrying about multi-threading issues and we + // automatically get started with the initial state for each + // request. If you really need to share some rw-data between + // all the handlers, use static data members with appropriate + // locking. See the <service> header in one of the web server + // directories (e.g., apache/) if you need to see the code that + // does this. + // + class handler + { + public: + virtual + ~handler () = default; + + // Description of configuration options supported by this handler. Note: + // should be callable during static initialization. + // + virtual option_descriptions + options () = 0; + + // During startup the web server calls this function on the handler + // exemplar to log the handler version information. It is up to the web + // server whether to call this function once per handler implementation + // type. Therefore, it is expected that this function will log the same + // information for all the handler exemplars. + // + virtual void + version (log&) = 0; + + // During startup the web server calls this function on the handler + // exemplar passing a list of configuration options. The place these + // configuration options come from is implementation-specific (normally + // a configuration file). The web server guarantees that only options + // listed in the map returned by the options() function above can be + // present. Any exception thrown by this function terminates the web + // server. + // + virtual void + init (const name_values&, log&) = 0; + + // Return false if decline to handle the request. If handling have been + // declined after any unbuffered content has been written, then the + // implementation shall terminate the response in a suitable but + // unspecified manner. + // + // Throw retry if need to retry handling the request. The retry will + // happen on the same instance of the handler and the implementation is + // expected to "rewind" the request and response objects to their initial + // state. This is only guaranteed to be possible if the relevant functions + // in the request and response objects were called in buffered mode (the + // buffer argument was true). + // + // Any exception other than retry and invalid_request described above that + // leaves this function is treated by the web server implementation as an + // internal server error (500). Similar to invalid_request, it will try to + // return the status and description (obtained by calling what() on + // std::exception) to the client, if possible. The description is assume + // to be encoded in UTF-8. The implementation may provide a configuration + // option to omit the description from the response, for security/privacy + // reasons. + // + struct retry {}; + + virtual bool + handle (request&, response&, log&) = 0; + }; +} + +#endif // WEB_SERVER_MODULE_HXX diff --git a/web/version.hxx.in b/web/version.hxx.in deleted file mode 100644 index ba51d44..0000000 --- a/web/version.hxx.in +++ /dev/null @@ -1,11 +0,0 @@ -// file : web/version.hxx.in -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_VERSION_HXX_IN -#define WEB_VERSION_HXX_IN - -#include <libstudxml/version.hxx> - -$libstudxml.check(LIBSTUDXML_VERSION, LIBSTUDXML_SNAPSHOT)$ - -#endif // WEB_VERSION_HXX_IN diff --git a/web/xhtml-fragment.cxx b/web/xhtml-fragment.cxx deleted file mode 100644 index dbe0f0f..0000000 --- a/web/xhtml-fragment.cxx +++ /dev/null @@ -1,143 +0,0 @@ -// file : web/xhtml-fragment.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include <web/xhtml-fragment.hxx> - -#include <string> -#include <cassert> - -#include <libstudxml/parser.hxx> -#include <libstudxml/serializer.hxx> - -#include <web/xhtml.hxx> - -using namespace std; -using namespace xml; - -namespace web -{ - namespace xhtml - { - fragment:: - fragment (const string& text, const string& name, size_t length) - { - // To parse the fragment make it a valid xml document, wrapping with the - // root element. If requested, truncate the fragment before the - // first-level element when the content length limit is exceeded. - // - string doc ("<d>" + text + "</d>"); - - parser p (doc.c_str (), - doc.size (), - name, - parser::receive_elements | - parser::receive_characters | - parser::receive_attributes_event); - - size_t len (0); - size_t level (0); - - for (parser::event_type e: p) - { - switch (e) - { - case parser::start_element: - { - truncated = length != 0 && level == 1 && len >= length; - - if (truncated) - break; - - ++level; - } - // Fall through. - case parser::start_attribute: - { - const auto& n (p.qname ()); - - if (!n.namespace_ ().empty ()) - throw parsing ( - name, p.line (), p.column (), "namespace is not allowed"); - - events_.emplace_back (e, n.name ()); - break; - } - case parser::end_element: - { - --level; - } - // Fall through. - case parser::end_attribute: - { - events_.emplace_back (e, ""); - break; - } - case parser::characters: - { - string& s (p.value ()); - - assert (!events_.empty ()); // Contains root element start. - - if (events_.back ().first != parser::start_attribute) - len += s.size (); - - events_.emplace_back (e, move (s)); - break; - } - default: - assert (false); - } - - if (truncated) - { - events_.emplace_back (parser::end_element, ""); // Close root. - break; - } - } - - // Unwrap the fragment removing the root element events. - // - assert (events_.size () >= 2); - events_.erase (events_.begin ()); - events_.pop_back (); - } - - void fragment:: - operator() (serializer& s) const - { - for (const auto& e: events_) - { - switch (e.first) - { - case parser::start_element: - { - s.start_element (xmlns, e.second); - break; - } - case parser::start_attribute: - { - s.start_attribute (e.second); - break; - } - case parser::end_element: - { - s.end_element (); - break; - } - case parser::end_attribute: - { - s.end_attribute (); - break; - } - case parser::characters: - { - s.characters (e.second); - break; - } - default: - assert (false); - } - } - } - } -} diff --git a/web/xhtml-fragment.hxx b/web/xhtml-fragment.hxx deleted file mode 100644 index 832d9eb..0000000 --- a/web/xhtml-fragment.hxx +++ /dev/null @@ -1,52 +0,0 @@ -// file : web/xhtml-fragment.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_XHTML_FRAGMENT_HXX -#define WEB_XHTML_FRAGMENT_HXX - -#include <string> -#include <vector> -#include <utility> // pair - -#include <libstudxml/parser.hxx> -#include <libstudxml/forward.hxx> - -namespace web -{ - namespace xhtml - { - // A parsed (via xml::parser) XHTML fragment that can later be serialized - // to xml::serializer. - // - class fragment - { - public: - bool truncated = false; - - public: - fragment () = default; - - // Parse string as an XHTML document fragment, truncating it if - // requested. The fragment should be complete, in the sense that all - // elements should have closing tags. Elements and attributes are - // considered to be in the namespace of the entire XHTML document, so no - // namespace should be specified for them. Do not validate against XHTML - // vocabulary. Can throw xml::parsing exception. - // - fragment (const std::string& xhtml, - const std::string& input_name, - size_t length = 0); - - void - operator() (xml::serializer&) const; - - bool - empty () const {return events_.empty ();} - - private: - std::vector<std::pair<xml::parser::event_type, std::string>> events_; - }; - } -} - -#endif // WEB_XHTML_FRAGMENT_HXX diff --git a/web/xhtml.hxx b/web/xhtml.hxx deleted file mode 100644 index 727ad5f..0000000 --- a/web/xhtml.hxx +++ /dev/null @@ -1,358 +0,0 @@ -// file : web/xhtml.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef WEB_XHTML_HXX -#define WEB_XHTML_HXX - -#include <libstudxml/serializer.hxx> - -#include <web/version.hxx> - -namespace web -{ - // "Canonical" XHTML5 vocabulary. - // - // * One-letter tag names and local variable clash problem - // - // a at|an|an anc anch - // b bt|bo|bl bld bold - // i it|it|it itl ital - // p pt|pr|pr par para - // q qt|qu|qt quo quot - // s st|st|st stk strk - // u ut|un|un unl undr - // - // Other options: - // - _a, a_, xa - // - A, I - // - x::i - // - user-defined literals: "a"_e, "/a"_e, "id"_a - // - // Things can actually get much worse, consider: - // - // int i; - // s << i << "text" << ~i; - // - // So perhaps this is the situation where the explicit namespace - // qualification (e.g., x::p) is the only robust option? - // - // - // * Element/attribute name clash problem (e.g., STYLE) - // - // - some attribute/element name decorator (STYLEA, STYLE_A, STYLE_) - // - rename attribute/element (e.g., STYLEDEF or CSSSTYLE[adds TYPE]); - // in case of STYLE we should probably rename the element since - // attribute will be much more frequently used. - // - "scope" attributes inside elements (P::STYLE); somewhat - // burdensome: P(P::STYLE); could then use low-case names - // for attributes - // - "scope" elements inside other elements (HEAD::STYLE); also - // burdensome. - // - // - // * Text wrapping/indentation - // - // For some (inline) elements we want additional indentation: - // - // 1. Indent content on newline (e.g., for <style>). - // 2. Automatically wrap and indent lines at (or before) certain - // length, say, 80 characters (e.g., for <p>). - // - // Would be nice to be able to implement this at the XHTML level, - // not XML. - // - namespace xhtml - { - const char* const xmlns = "http://www.w3.org/1999/xhtml"; - - struct attr_value_base - { - const char* name; - mutable const attr_value_base* next; - - virtual void - operator() (xml::serializer& s) const = 0; - - protected: - explicit - attr_value_base (const char* n): name (n), next (nullptr) {} - }; - - template <typename T> - struct attr_value: attr_value_base - { - const T* val; - - attr_value (const char* n, const T& v): attr_value_base (n), val (&v) {} - - virtual void - operator() (xml::serializer& s) const - { - s.attribute (name, *val); - if (next != nullptr) - s << *next; - } - }; - - struct element_base; - - // End tag of an element (~P). - // - struct end_element - { - const element_base* e; - - void - operator() (xml::serializer& s) const; - }; - - // Element without any conten (*BR). - // - struct empty_element - { - const element_base* e; - - void - operator() (xml::serializer& s) const; - }; - - struct element_base - { - virtual void - start (xml::serializer& s) const = 0; - - virtual void - end (xml::serializer& s) const = 0; - - void operator() (xml::serializer& s) const {start (s);} - end_element operator~ () const {return end_element {this};} - empty_element operator* () const {return empty_element {this};} - }; - - inline void end_element:: - operator() (xml::serializer& s) const {e->end (s);} - - inline void empty_element:: - operator() (xml::serializer& s) const {s << *e << ~*e;} - - // Element with an attribute chain, e.g., P(ID = 123, CLASS = "abc"). - // - struct attr_element: element_base - { - const element_base* e; - const attr_value_base* a; - - attr_element (const element_base& e, const attr_value_base& a) - : e (&e), a (&a) {} - - virtual void - start (xml::serializer& s) const {e->start (s); s << *a;} - - virtual void - end (xml::serializer& s) const {e->end (s);} - }; - - struct element: element_base - { - const char* name; - - explicit - element (const char* n): name (n) {} - - virtual void - start (xml::serializer& s) const {s.start_element (xmlns, name);} - - virtual void - end (xml::serializer& s) const {s.end_element (xmlns, name);} - - // s << elem(attr1 = 123, attr2 = "abc"); - // - template <typename T1> - attr_element - operator () (const attr_value<T1>& a1) const - { - return attr_element (*this, a1); - } - - template <typename T1, typename... TN> - attr_element - operator () (const attr_value<T1>& a1, const attr_value<TN>&... an) const - { - a1.next = operator() (an...).a; - return attr_element (*this, a1); - } - - using element_base::operator(); - }; - - struct inline_element: element - { - using element::element; - - virtual void - start (xml::serializer& s) const - { - s.suspend_indentation (); - element::start (s); - } - - virtual void - end (xml::serializer& s) const - { - element::end (s); - s.resume_indentation (); - } - }; - - struct attribute; - struct end_attribute - { - const attribute* a; - - void - operator() (xml::serializer& s) const; - }; - - struct attribute - { - const char* name; - - explicit - attribute (const char* n): name (n) {} - - // s << (attr1 = 123) << (attr2 = "abc"); - // - template <typename T> - attr_value<T> - operator= (const T& v) const {return attr_value<T> (name, v);} - - // s << attr1 (123) << attr2 ("abc"); - // - template <typename T> - attr_value<T> - operator() (const T& v) const {return attr_value<T> (name, v);} - - // s << attr1 << 123 << ~attr1 << attr2 << "abc" << ~attr2; - // - virtual void - start (xml::serializer& s) const {s.start_attribute (name);}; - - virtual void - end (xml::serializer& s) const {s.end_attribute (name);} - - void operator() (xml::serializer& s) const {start (s);} - end_attribute operator~ () const {return end_attribute {this};} - }; - - inline void end_attribute:: - operator() (xml::serializer& s) const {a->end (s);} - - // Elements. - // - // Note that they are all declared static which means we may end - // up with multiple identical copies if this header get included - // into multiple translation units. The hope here is that the - // compiler will "see-through" and eliminate all of them. - // - struct html_element: element - { - html_element (): element ("html") {} - - virtual void - start (xml::serializer& s) const - { - s.doctype_decl ("html"); - s.start_element (xmlns, name); - s.namespace_decl (xmlns, ""); - } - }; - static const html_element HTML; - - struct head_element: element - { - head_element (): element ("head") {} - - virtual void - start (xml::serializer& s) const - { - s.start_element (xmlns, name); - s.start_element (xmlns, "meta"); - s.attribute ("charset", "UTF-8"); - s.end_element (); - s.start_element (xmlns, "meta"); - s.attribute ("name", "viewport"); - s.attribute ("content", "device-width, initial-scale=1"); - s.end_element (); - } - }; - static const head_element HEAD; - - struct css_style_element: element - { - css_style_element (): element ("style") {} - - virtual void - start (xml::serializer& s) const - { - s.start_element (xmlns, name); - s.attribute ("type", "text/css"); - } - }; - static const css_style_element CSS_STYLE; - - static const element BODY ("body"); - static const element DATALIST ("datalist"); - static const element DIV ("div"); - static const element FORM ("form"); - static const element H1 ("h1"); - static const element H2 ("h2"); - static const element H3 ("h3"); - static const element H4 ("h4"); - static const element H5 ("h5"); - static const element H6 ("h6"); - static const element LI ("li"); - static const element LINK ("link"); - static const element META ("meta"); - static const element OPTION ("option"); - static const element P ("p"); - static const element PRE ("pre"); - static const element SCRIPT ("script"); - static const element SELECT ("select"); - static const element TABLE ("table"); - static const element TBODY ("tbody"); - static const element TD ("td"); - static const element TH ("th"); - static const element TITLE ("title"); - static const element TR ("tr"); - static const element UL ("ul"); - - static const inline_element A ("a"); - static const inline_element B ("b"); - static const inline_element BR ("br"); - static const inline_element CODE ("code"); - static const inline_element EM ("em"); - static const inline_element I ("i"); - static const inline_element INPUT ("input"); - static const inline_element SPAN ("span"); - static const inline_element U ("u"); - - // Attributes. - // - - static const attribute AUTOFOCUS ("autofocus"); - static const attribute CLASS ("class"); - static const attribute CONTENT ("content"); - static const attribute HREF ("href"); - static const attribute ID ("id"); - static const attribute LIST ("list"); - static const attribute NAME ("name"); - static const attribute REL ("rel"); - static const attribute PLACEHOLDER ("placeholder"); - static const attribute SELECTED ("selected"); - static const attribute STYLE ("style"); - static const attribute TYPE ("type"); - static const attribute VALUE ("value"); - } -} - -#endif // WEB_XHTML_HXX diff --git a/web/xhtml/.gitignore b/web/xhtml/.gitignore new file mode 100644 index 0000000..426db9e --- /dev/null +++ b/web/xhtml/.gitignore @@ -0,0 +1 @@ +version.hxx diff --git a/web/xhtml/buildfile b/web/xhtml/buildfile new file mode 100644 index 0000000..06dd34c --- /dev/null +++ b/web/xhtml/buildfile @@ -0,0 +1,10 @@ +# file : web/xhtml/buildfile +# license : MIT; see accompanying LICENSE file + +import libs = libstudxml%lib{studxml} + +./: {libue libus}{xhtml}: {hxx ixx txx cxx}{** -version} {hxx}{version} $libs + +hxx{version}: in{version} $src_root/manifest + +{hxx ixx txx}{*}: install = false diff --git a/web/xhtml/fragment.cxx b/web/xhtml/fragment.cxx new file mode 100644 index 0000000..843db82 --- /dev/null +++ b/web/xhtml/fragment.cxx @@ -0,0 +1,143 @@ +// file : web/xhtml/fragment.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <web/xhtml/fragment.hxx> + +#include <string> +#include <cassert> + +#include <libstudxml/parser.hxx> +#include <libstudxml/serializer.hxx> + +#include <web/xhtml/serialization.hxx> + +using namespace std; +using namespace xml; + +namespace web +{ + namespace xhtml + { + fragment:: + fragment (const string& text, const string& name, size_t length) + { + // To parse the fragment make it a valid xml document, wrapping with the + // root element. If requested, truncate the fragment before the + // first-level element when the content length limit is exceeded. + // + string doc ("<d>" + text + "</d>"); + + parser p (doc.c_str (), + doc.size (), + name, + parser::receive_elements | + parser::receive_characters | + parser::receive_attributes_event); + + size_t len (0); + size_t level (0); + + for (parser::event_type e: p) + { + switch (e) + { + case parser::start_element: + { + truncated = length != 0 && level == 1 && len >= length; + + if (truncated) + break; + + ++level; + } + // Fall through. + case parser::start_attribute: + { + const auto& n (p.qname ()); + + if (!n.namespace_ ().empty ()) + throw parsing ( + name, p.line (), p.column (), "namespace is not allowed"); + + events_.emplace_back (e, n.name ()); + break; + } + case parser::end_element: + { + --level; + } + // Fall through. + case parser::end_attribute: + { + events_.emplace_back (e, ""); + break; + } + case parser::characters: + { + string& s (p.value ()); + + assert (!events_.empty ()); // Contains root element start. + + if (events_.back ().first != parser::start_attribute) + len += s.size (); + + events_.emplace_back (e, move (s)); + break; + } + default: + assert (false); + } + + if (truncated) + { + events_.emplace_back (parser::end_element, ""); // Close root. + break; + } + } + + // Unwrap the fragment removing the root element events. + // + assert (events_.size () >= 2); + events_.erase (events_.begin ()); + events_.pop_back (); + } + + void fragment:: + operator() (serializer& s) const + { + for (const auto& e: events_) + { + switch (e.first) + { + case parser::start_element: + { + s.start_element (xmlns, e.second); + break; + } + case parser::start_attribute: + { + s.start_attribute (e.second); + break; + } + case parser::end_element: + { + s.end_element (); + break; + } + case parser::end_attribute: + { + s.end_attribute (); + break; + } + case parser::characters: + { + s.characters (e.second); + break; + } + default: + assert (false); + } + } + } + } +} diff --git a/web/xhtml/fragment.hxx b/web/xhtml/fragment.hxx new file mode 100644 index 0000000..eab4335 --- /dev/null +++ b/web/xhtml/fragment.hxx @@ -0,0 +1,52 @@ +// file : web/xhtml/fragment.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_XHTML_FRAGMENT_HXX +#define WEB_XHTML_FRAGMENT_HXX + +#include <string> +#include <vector> +#include <utility> // pair + +#include <libstudxml/parser.hxx> +#include <libstudxml/forward.hxx> + +namespace web +{ + namespace xhtml + { + // A parsed (via xml::parser) XHTML fragment that can later be serialized + // to xml::serializer. + // + class fragment + { + public: + bool truncated = false; + + public: + fragment () = default; + + // Parse string as an XHTML document fragment, truncating it if + // requested. The fragment should be complete, in the sense that all + // elements should have closing tags. Elements and attributes are + // considered to be in the namespace of the entire XHTML document, so no + // namespace should be specified for them. Do not validate against XHTML + // vocabulary. Can throw xml::parsing exception. + // + fragment (const std::string& xhtml, + const std::string& input_name, + size_t length = 0); + + void + operator() (xml::serializer&) const; + + bool + empty () const {return events_.empty ();} + + private: + std::vector<std::pair<xml::parser::event_type, std::string>> events_; + }; + } +} + +#endif // WEB_XHTML_FRAGMENT_HXX diff --git a/web/xhtml/serialization.hxx b/web/xhtml/serialization.hxx new file mode 100644 index 0000000..03e72ff --- /dev/null +++ b/web/xhtml/serialization.hxx @@ -0,0 +1,358 @@ +// file : web/xhtml/serialization.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_XHTML_SERIALIZATION_HXX +#define WEB_XHTML_SERIALIZATION_HXX + +#include <libstudxml/serializer.hxx> + +#include <web/xhtml/version.hxx> + +namespace web +{ + // "Canonical" XHTML5 vocabulary. + // + // * One-letter tag names and local variable clash problem + // + // a at|an|an anc anch + // b bt|bo|bl bld bold + // i it|it|it itl ital + // p pt|pr|pr par para + // q qt|qu|qt quo quot + // s st|st|st stk strk + // u ut|un|un unl undr + // + // Other options: + // - _a, a_, xa + // - A, I + // - x::i + // - user-defined literals: "a"_e, "/a"_e, "id"_a + // + // Things can actually get much worse, consider: + // + // int i; + // s << i << "text" << ~i; + // + // So perhaps this is the situation where the explicit namespace + // qualification (e.g., x::p) is the only robust option? + // + // + // * Element/attribute name clash problem (e.g., STYLE) + // + // - some attribute/element name decorator (STYLEA, STYLE_A, STYLE_) + // - rename attribute/element (e.g., STYLEDEF or CSSSTYLE[adds TYPE]); + // in case of STYLE we should probably rename the element since + // attribute will be much more frequently used. + // - "scope" attributes inside elements (P::STYLE); somewhat + // burdensome: P(P::STYLE); could then use low-case names + // for attributes + // - "scope" elements inside other elements (HEAD::STYLE); also + // burdensome. + // + // + // * Text wrapping/indentation + // + // For some (inline) elements we want additional indentation: + // + // 1. Indent content on newline (e.g., for <style>). + // 2. Automatically wrap and indent lines at (or before) certain + // length, say, 80 characters (e.g., for <p>). + // + // Would be nice to be able to implement this at the XHTML level, + // not XML. + // + namespace xhtml + { + const char* const xmlns = "http://www.w3.org/1999/xhtml"; + + struct attr_value_base + { + const char* name; + mutable const attr_value_base* next; + + virtual void + operator() (xml::serializer& s) const = 0; + + protected: + explicit + attr_value_base (const char* n): name (n), next (nullptr) {} + }; + + template <typename T> + struct attr_value: attr_value_base + { + const T* val; + + attr_value (const char* n, const T& v): attr_value_base (n), val (&v) {} + + virtual void + operator() (xml::serializer& s) const + { + s.attribute (name, *val); + if (next != nullptr) + s << *next; + } + }; + + struct element_base; + + // End tag of an element (~P). + // + struct end_element + { + const element_base* e; + + void + operator() (xml::serializer& s) const; + }; + + // Element without any conten (*BR). + // + struct empty_element + { + const element_base* e; + + void + operator() (xml::serializer& s) const; + }; + + struct element_base + { + virtual void + start (xml::serializer& s) const = 0; + + virtual void + end (xml::serializer& s) const = 0; + + void operator() (xml::serializer& s) const {start (s);} + end_element operator~ () const {return end_element {this};} + empty_element operator* () const {return empty_element {this};} + }; + + inline void end_element:: + operator() (xml::serializer& s) const {e->end (s);} + + inline void empty_element:: + operator() (xml::serializer& s) const {s << *e << ~*e;} + + // Element with an attribute chain, e.g., P(ID = 123, CLASS = "abc"). + // + struct attr_element: element_base + { + const element_base* e; + const attr_value_base* a; + + attr_element (const element_base& e, const attr_value_base& a) + : e (&e), a (&a) {} + + virtual void + start (xml::serializer& s) const {e->start (s); s << *a;} + + virtual void + end (xml::serializer& s) const {e->end (s);} + }; + + struct element: element_base + { + const char* name; + + explicit + element (const char* n): name (n) {} + + virtual void + start (xml::serializer& s) const {s.start_element (xmlns, name);} + + virtual void + end (xml::serializer& s) const {s.end_element (xmlns, name);} + + // s << elem(attr1 = 123, attr2 = "abc"); + // + template <typename T1> + attr_element + operator () (const attr_value<T1>& a1) const + { + return attr_element (*this, a1); + } + + template <typename T1, typename... TN> + attr_element + operator () (const attr_value<T1>& a1, const attr_value<TN>&... an) const + { + a1.next = operator() (an...).a; + return attr_element (*this, a1); + } + + using element_base::operator(); + }; + + struct inline_element: element + { + using element::element; + + virtual void + start (xml::serializer& s) const + { + s.suspend_indentation (); + element::start (s); + } + + virtual void + end (xml::serializer& s) const + { + element::end (s); + s.resume_indentation (); + } + }; + + struct attribute; + struct end_attribute + { + const attribute* a; + + void + operator() (xml::serializer& s) const; + }; + + struct attribute + { + const char* name; + + explicit + attribute (const char* n): name (n) {} + + // s << (attr1 = 123) << (attr2 = "abc"); + // + template <typename T> + attr_value<T> + operator= (const T& v) const {return attr_value<T> (name, v);} + + // s << attr1 (123) << attr2 ("abc"); + // + template <typename T> + attr_value<T> + operator() (const T& v) const {return attr_value<T> (name, v);} + + // s << attr1 << 123 << ~attr1 << attr2 << "abc" << ~attr2; + // + virtual void + start (xml::serializer& s) const {s.start_attribute (name);}; + + virtual void + end (xml::serializer& s) const {s.end_attribute (name);} + + void operator() (xml::serializer& s) const {start (s);} + end_attribute operator~ () const {return end_attribute {this};} + }; + + inline void end_attribute:: + operator() (xml::serializer& s) const {a->end (s);} + + // Elements. + // + // Note that they are all declared static which means we may end + // up with multiple identical copies if this header get included + // into multiple translation units. The hope here is that the + // compiler will "see-through" and eliminate all of them. + // + struct html_element: element + { + html_element (): element ("html") {} + + virtual void + start (xml::serializer& s) const + { + s.doctype_decl ("html"); + s.start_element (xmlns, name); + s.namespace_decl (xmlns, ""); + } + }; + static const html_element HTML; + + struct head_element: element + { + head_element (): element ("head") {} + + virtual void + start (xml::serializer& s) const + { + s.start_element (xmlns, name); + s.start_element (xmlns, "meta"); + s.attribute ("charset", "UTF-8"); + s.end_element (); + s.start_element (xmlns, "meta"); + s.attribute ("name", "viewport"); + s.attribute ("content", "device-width, initial-scale=1"); + s.end_element (); + } + }; + static const head_element HEAD; + + struct css_style_element: element + { + css_style_element (): element ("style") {} + + virtual void + start (xml::serializer& s) const + { + s.start_element (xmlns, name); + s.attribute ("type", "text/css"); + } + }; + static const css_style_element CSS_STYLE; + + static const element BODY ("body"); + static const element DATALIST ("datalist"); + static const element DIV ("div"); + static const element FORM ("form"); + static const element H1 ("h1"); + static const element H2 ("h2"); + static const element H3 ("h3"); + static const element H4 ("h4"); + static const element H5 ("h5"); + static const element H6 ("h6"); + static const element LI ("li"); + static const element LINK ("link"); + static const element META ("meta"); + static const element OPTION ("option"); + static const element P ("p"); + static const element PRE ("pre"); + static const element SCRIPT ("script"); + static const element SELECT ("select"); + static const element TABLE ("table"); + static const element TBODY ("tbody"); + static const element TD ("td"); + static const element TH ("th"); + static const element TITLE ("title"); + static const element TR ("tr"); + static const element UL ("ul"); + + static const inline_element A ("a"); + static const inline_element B ("b"); + static const inline_element BR ("br"); + static const inline_element CODE ("code"); + static const inline_element EM ("em"); + static const inline_element I ("i"); + static const inline_element INPUT ("input"); + static const inline_element SPAN ("span"); + static const inline_element U ("u"); + + // Attributes. + // + + static const attribute AUTOFOCUS ("autofocus"); + static const attribute CLASS ("class"); + static const attribute CONTENT ("content"); + static const attribute HREF ("href"); + static const attribute ID ("id"); + static const attribute LIST ("list"); + static const attribute NAME ("name"); + static const attribute REL ("rel"); + static const attribute PLACEHOLDER ("placeholder"); + static const attribute SELECTED ("selected"); + static const attribute STYLE ("style"); + static const attribute TYPE ("type"); + static const attribute VALUE ("value"); + } +} + +#endif // WEB_XHTML_SERIALIZATION_HXX diff --git a/web/xhtml/version.hxx.in b/web/xhtml/version.hxx.in new file mode 100644 index 0000000..fe3e4e5 --- /dev/null +++ b/web/xhtml/version.hxx.in @@ -0,0 +1,11 @@ +// file : web/xhtml/version.hxx.in -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_XHTML_VERSION_HXX_IN +#define WEB_XHTML_VERSION_HXX_IN + +#include <libstudxml/version.hxx> + +$libstudxml.check(LIBSTUDXML_VERSION, LIBSTUDXML_SNAPSHOT)$ + +#endif // WEB_XHTML_VERSION_HXX_IN -- cgit v1.1