From 6be5bc707876ece1cd09d7c304ba559512ef5257 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 19 Apr 2017 20:48:09 +0300 Subject: Implement request body caching --- web/apache/request | 32 +++-- web/apache/request.cxx | 319 ++++++++++++++++++++++++++++++++++++++++++++----- web/apache/request.ixx | 46 +------ web/apache/stream | 4 +- web/module | 19 ++- 5 files changed, 331 insertions(+), 89 deletions(-) (limited to 'web') diff --git a/web/apache/request b/web/apache/request index 7f8cf8c..a35c5dc 100644 --- a/web/apache/request +++ b/web/apache/request @@ -50,17 +50,23 @@ namespace web writing }; + // Extends istreambuf with read limit checking, caching, etc. (see the + // implementation for details). + // + class istreambuf_cache; + class request: public web::request, public web::response, public stream_state { friend class service; - request (request_rec* rec) noexcept - : rec_ (rec) - { - rec_->status = HTTP_OK; - } + // Can not be inline/default due to the member of + // unique_ptr type. Note that istreambuf_cache type is + // incomplete. + // + request (request_rec* rec) noexcept; + ~request (); request_state state () const noexcept {return state_;} @@ -87,7 +93,7 @@ namespace web // Get request body data stream. // virtual std::istream& - content (bool buffer = false); + content (size_t limit = 0, size_t buffer = 0); // Get request parameters. // @@ -128,7 +134,10 @@ namespace web bool buffer = true); private: - // Get application/x-www-form-urlencoded form data. + // Get application/x-www-form-urlencoded form data. If request::content() + // was not called yet (and so limits are not specified) then set both of + // them to 64KB. Rewind the stream afterwards, so it's available for the + // application as well, unless no buffering were requested beforehand. // const std::string& form_data (); @@ -152,6 +161,12 @@ namespace web virtual void set_write_state () {state (request_state::writing);} + // Rewind the input stream (that must exist). Throw sequence_error if + // some unbuffered content have already been read. + // + void + rewind_istream (); + private: request_rec* rec_; request_state state_ = request_state::initial; @@ -160,7 +175,8 @@ namespace web std::unique_ptr parameters_; std::unique_ptr cookies_; std::unique_ptr form_data_; - std::unique_ptr in_buf_; + + std::unique_ptr in_buf_; std::unique_ptr in_; std::unique_ptr out_buf_; diff --git a/web/apache/request.cxx b/web/apache/request.cxx index 4e9d1fa..f69fedc 100644 --- a/web/apache/request.cxx +++ b/web/apache/request.cxx @@ -10,9 +10,10 @@ #include // request_rec, HTTP_*, OK #include // ap_*() -#include // strcasecmp() +#include // strcasecmp(), strncasecmp() #include // strftime(), time_t +#include #include #include // unique_ptr #include @@ -20,11 +21,12 @@ #include #include #include -#include // str*(), size_t +#include // str*(), memcpy(), size_t #include // move() #include // invalid_argument #include // current_exception() #include +#include // min() #include @@ -36,6 +38,211 @@ namespace web { namespace apache { + // Extend the Apache stream with checking for the read limit and caching + // the content if requested. Replay the cached content after rewind. + // + class istreambuf_cache: public istreambuf + { + enum class mode + { + cache, // Read from Apache stream, save the read data into the cache. + replay, // Read from the cache. + proxy // Read from Apache stream (don't save into the cache). + }; + + public: + istreambuf_cache (size_t read_limit, size_t cache_limit, + request_rec* r, + stream_state& s, + size_t bufsize = 1024, + size_t putback = 1) + : istreambuf (r, s, bufsize, putback), + read_limit_ (read_limit), + cache_limit_ (cache_limit) + { + } + + void + rewind () + { + // Fail if some content is already missed in the cache. + // + if (mode_ == mode::proxy) + throw sequence_error ( + string ("web::apache::istreambuf_cache::rewind: ") + + (cache_limit_ > 0 + ? "half-buffered" + : "unbuffered")); + + mode_ = mode::replay; + replay_pos_ = 0; + setg (nullptr, nullptr, nullptr); + } + + void + limits (size_t read_limit, size_t cache_limit) + { + if (read_limit > 0) + read_limit_ = read_limit; + + if (cache_limit > 0) + { + // We can not increase the cache limit if some content is already + // missed in the cache. + // + if (cache_limit > cache_limit_ && mode_ == mode::proxy) + throw sequence_error ( + "web::apache::istreambuf_cache::limits: unbuffered"); + + cache_limit_ = cache_limit; + } + } + + size_t read_limit () const noexcept {return read_limit_;} + size_t cache_limit () const noexcept {return cache_limit_;} + + private: + virtual int_type + underflow (); + + private: + // Limits + // + size_t read_limit_; + size_t cache_limit_; + + // State + // + mode mode_ = mode::cache; + size_t read_bytes_ = 0; + bool eof_ = false; // End of Apache stream is reached. + + // Cache + // + struct chunk + { + vector data; + size_t offset; + + chunk (vector&& d, size_t o): data (move (d)), offset (o) {} + + // Make the type move constructible-only to avoid copying of chunks on + // vector growth. + // + chunk (chunk&&) = default; + }; + + vector cache_; + size_t cache_size_ = 0; + size_t replay_pos_ = 0; + }; + + istreambuf_cache::int_type istreambuf_cache:: + underflow () + { + if (gptr () < egptr ()) + return traits_type::to_int_type (*gptr ()); + + if (mode_ == mode::replay) + { + if (replay_pos_ < cache_.size ()) + { + chunk& ch (cache_[replay_pos_++]); + char* p (ch.data.data ()); + setg (p, p + ch.offset, p + ch.data.size ()); + return traits_type::to_int_type (*gptr ()); + } + + // No more data to replay, so switch to the cache mode. That includes + // resetting eback, gptr and egptr, so they point into the istreambuf's + // internal buffer. Putback area should also be restored. + // + mode_ = mode::cache; + + // Bailout if the end of stream is reached. + // + if (eof_) + return traits_type::eof (); + + char* p (buf_.data () + putback_); + size_t pb (0); + + // Restore putback area if there is any cached data. Thanks to + // istreambuf, it's all in a single chunk. + // + if (!cache_.empty ()) + { + chunk& ch (cache_.back ()); + pb = min (putback_, ch.data.size ()); + memcpy (p - pb, ch.data.data () + ch.data.size () - pb, pb); + } + + setg (p - pb, p, p); + } + + // Delegate reading to the base class in the cache or proxy modes, but + // check for the read limit first. + // + if (read_limit_ && read_bytes_ >= read_limit_) + throw invalid_request (HTTP_REQUEST_ENTITY_TOO_LARGE, + "payload too large"); + + // Throws the sequence_error exception if some unbuffered content is + // already written. + // + int_type r (istreambuf::underflow ()); + + if (r == traits_type::eof ()) + { + eof_ = true; + return r; + } + + // Increment the read bytes counter. + // + size_t rb (egptr () - gptr ()); + read_bytes_ += rb; + + // In the cache mode save the read data if the cache limit is not + // reached, otherwise switch to the proxy mode. + // + if (mode_ == mode::cache) + { + // Not to complicate things we will copy the buffer into the cache + // together with the putback area, which is OK as it usually takes a + // small fraction of the buffer. By the same reason we will cache the + // whole data read even though we can exceed the limits by + // bufsize - putback - 1 bytes. + // + if (cache_size_ < cache_limit_) + { + chunk ch (vector (eback (), egptr ()), + static_cast (gptr () - eback ())); + + cache_.emplace_back (move (ch)); + cache_size_ += rb; + } + else + mode_ = mode::proxy; + } + + return r; + } + + // request + // + request:: + request (request_rec* rec) noexcept + : rec_ (rec) + { + rec_->status = HTTP_OK; + } + + request:: + ~request () + { + } + void request:: state (request_state s) { @@ -84,50 +291,57 @@ namespace web void request:: rewind () { - // @@ Request content buffering, and response cookies buffering are not - // supported yet. When done will be possible to rewind in broader - // range of cases. + // @@ Response cookies buffering is not supported yet. When done will be + // possible to rewind in broader range of cases. // + if (state_ > request_state::reading) + throw sequence_error ("web::apache::request::rewind: unbuffered"); - if (state_ == request_state::initial || + out_.reset (); + out_buf_.reset (); - // Form data have been read. Lucky case, can rewind. - // - (state_ == request_state::reading && - dynamic_cast (in_buf_.get ()) != nullptr)) - { - out_.reset (); - out_buf_.reset (); + rec_->status = HTTP_OK; - rec_->status = HTTP_OK; + ap_set_content_type (rec_, nullptr); // Unset the output content type. - ap_set_content_type (rec_, nullptr); + if (in_ != nullptr) + rewind_istream (); + } - if (in_) - in_->seekg (0); - } - else - throw sequence_error ("web::apache::request::rewind"); + void request:: + rewind_istream () + { + assert (in_buf_ != nullptr && in_ != nullptr); + + in_buf_->rewind (); // Throws if impossible to rewind. + in_->clear (); // Clears *bit flags (in particular eofbit). } istream& request:: - content (bool buffer) + content (size_t limit, size_t buffer) { - assert (!buffer); // Request content buffering is not implemented yet. - - if (!in_) + // Create the input stream/streambuf if not present, otherwise adjust the + // limits. + // + if (in_ == nullptr) { - unique_ptr in_buf (new istreambuf (rec_, *this)); + unique_ptr in_buf ( + new istreambuf_cache (limit, buffer, rec_, *this)); in_.reset (new istream (in_buf.get ())); in_buf_ = move (in_buf); in_->exceptions (istream::failbit | istream::badbit); - // Save form data now otherwise will not be available to do later - // when data already read from stream. + // Save form data now otherwise will not be available to do later when + // data is already read from stream. // form_data (); } + else + { + assert (in_buf_ != nullptr); + in_buf_->limits (limit, buffer); + } return *in_; } @@ -245,8 +459,8 @@ namespace web if (!buffer) // Request body will be discarded prior first byte of content is - // written. Save form data now to make it available for furture - // parameters () call. + // written. Save form data now to make it available for future + // parameters() call. // form_data (); @@ -360,5 +574,52 @@ namespace web n = e ? e + 1 : nullptr; } } + + const string& request:: + form_data () + { + if (!form_data_) + { + form_data_.reset (new string ()); + + if (rec_->method_number == M_POST) + { + const char* ct (apr_table_get (rec_->headers_in, "Content-Type")); + + if (ct != nullptr && + strncasecmp ("application/x-www-form-urlencoded", ct, 33) == 0) + { + size_t limit (0); + bool rewind (true); + + // Assign some reasonable (64K) input content read/cache limits if + // not done explicitly yet (with the request::content() call). + // Rewind afterwards unless the cache limit is set to zero. + // + if (in_buf_ == nullptr) + limit = 64 * 1024; + else + rewind = in_buf_->cache_limit () > 0; + + istream& istr (content (limit, limit)); + + // Do not throw when eofbit is set (end of stream reached), and + // when failbit is set (getline() failed to extract any character). + // + istream::iostate e (istr.exceptions ()); // Save exception mask. + istr.exceptions (istream::badbit); + getline (istr, *form_data_); + istr.exceptions (e); // Restore exception mask. + + // Rewind the stream unless no buffering were requested beforehand. + // + if (rewind) + rewind_istream (); + } + } + } + + return *form_data_; + } } } diff --git a/web/apache/request.ixx b/web/apache/request.ixx index 6dde6ce..4218756 100644 --- a/web/apache/request.ixx +++ b/web/apache/request.ixx @@ -2,14 +2,9 @@ // copyright : Copyright (c) 2014-2017 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file -#include // strncasecmp() - -#include // apr_table_* - #include // ap_*() -#include -#include // move() +#include // stringbuf namespace web { @@ -47,44 +42,5 @@ namespace web ? OK : rec_->status; } - - inline const std::string& request:: - form_data () - { - if (!form_data_) - { - form_data_.reset (new std::string ()); - - if (rec_->method_number == M_POST) - { - const char* ct (apr_table_get (rec_->headers_in, "Content-Type")); - - if (ct != nullptr && - strncasecmp ("application/x-www-form-urlencoded", ct, 33) == 0) - { - std::istream& istr (content ()); - - // Do not throw when eofbit is set (end of stream reached), and - // when failbit is set (getline() failed to extract any - // character). - // - istr.exceptions (std::istream::badbit); - std::getline (istr, *form_data_); - - // Make this data the content of the input stream, so it's - // available for the application as well. - // - std::unique_ptr in_buf ( - new std::stringbuf (*form_data_)); - - in_.reset (new std::istream (in_buf.get ())); - in_buf_ = std::move (in_buf); - in_->exceptions (std::istream::failbit | std::istream::badbit); - } - } - } - - return *form_data_; - } } } diff --git a/web/apache/stream b/web/apache/stream index 9230d1b..d4abb4e 100644 --- a/web/apache/stream +++ b/web/apache/stream @@ -113,7 +113,7 @@ namespace web setg (p, p, p); } - private: + protected: virtual int_type underflow () { @@ -138,7 +138,7 @@ namespace web return traits_type::to_int_type (*gptr ()); } - private: + protected: size_t bufsize_; size_t putback_; std::vector buf_; diff --git a/web/module b/web/module index b770d67..1e588a4 100644 --- a/web/module +++ b/web/module @@ -11,6 +11,7 @@ #include #include #include // uint16_t +#include // size_t #include // move() #include // runtime_error @@ -111,13 +112,21 @@ namespace web virtual const name_values& cookies () = 0; - // Get the stream to read the request content from. If the buffer argument - // is false, then reading content after any unbuffered content has been - // written or after a retry is undefined behavior. The implementation may - // detect this and throw sequence_error but is not required to do so. + // Get the stream to read the request content from. If the limit argument + // is zero, then the content limit is left unchanged (unlimited initially). + // Otherwise the requested limit is set, and the invalid_request exception + // with the code 413 (payload too large) will be thrown when the specified + // limit is reached while reading from the stream. If the buffer argument + // is zero, then the buffer size is left unchanged (zero initially). If it + // is impossible to increase the buffer size (because, for example, some + // content is already read unbuffered), then the sequence_error is thrown. + // + // Note that unread input content is discarded when any unbuffered content + // is written, and any attempt to read it will result in the + // sequence_error exception being thrown. // virtual std::istream& - content (bool buffer = false) = 0; + content (size_t limit = 0, size_t buffer = 0) = 0; }; class response -- cgit v1.1