From 35359f038f571dc46de3d14af72a2bc911fb0a24 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 18 Mar 2020 22:17:49 +0300 Subject: Implement brep-monitor --- web/xhtml/.gitignore | 1 + web/xhtml/buildfile | 10 ++ web/xhtml/fragment.cxx | 143 ++++++++++++++++++ web/xhtml/fragment.hxx | 52 +++++++ web/xhtml/serialization.hxx | 358 ++++++++++++++++++++++++++++++++++++++++++++ web/xhtml/version.hxx.in | 11 ++ 6 files changed, 575 insertions(+) create mode 100644 web/xhtml/.gitignore create mode 100644 web/xhtml/buildfile create mode 100644 web/xhtml/fragment.cxx create mode 100644 web/xhtml/fragment.hxx create mode 100644 web/xhtml/serialization.hxx create mode 100644 web/xhtml/version.hxx.in (limited to 'web/xhtml') diff --git a/web/xhtml/.gitignore b/web/xhtml/.gitignore new file mode 100644 index 0000000..426db9e --- /dev/null +++ b/web/xhtml/.gitignore @@ -0,0 +1 @@ +version.hxx diff --git a/web/xhtml/buildfile b/web/xhtml/buildfile new file mode 100644 index 0000000..06dd34c --- /dev/null +++ b/web/xhtml/buildfile @@ -0,0 +1,10 @@ +# file : web/xhtml/buildfile +# license : MIT; see accompanying LICENSE file + +import libs = libstudxml%lib{studxml} + +./: {libue libus}{xhtml}: {hxx ixx txx cxx}{** -version} {hxx}{version} $libs + +hxx{version}: in{version} $src_root/manifest + +{hxx ixx txx}{*}: install = false diff --git a/web/xhtml/fragment.cxx b/web/xhtml/fragment.cxx new file mode 100644 index 0000000..843db82 --- /dev/null +++ b/web/xhtml/fragment.cxx @@ -0,0 +1,143 @@ +// file : web/xhtml/fragment.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +#include +#include + +#include + +using namespace std; +using namespace xml; + +namespace web +{ + namespace xhtml + { + fragment:: + fragment (const string& text, const string& name, size_t length) + { + // To parse the fragment make it a valid xml document, wrapping with the + // root element. If requested, truncate the fragment before the + // first-level element when the content length limit is exceeded. + // + string doc ("" + text + ""); + + parser p (doc.c_str (), + doc.size (), + name, + parser::receive_elements | + parser::receive_characters | + parser::receive_attributes_event); + + size_t len (0); + size_t level (0); + + for (parser::event_type e: p) + { + switch (e) + { + case parser::start_element: + { + truncated = length != 0 && level == 1 && len >= length; + + if (truncated) + break; + + ++level; + } + // Fall through. + case parser::start_attribute: + { + const auto& n (p.qname ()); + + if (!n.namespace_ ().empty ()) + throw parsing ( + name, p.line (), p.column (), "namespace is not allowed"); + + events_.emplace_back (e, n.name ()); + break; + } + case parser::end_element: + { + --level; + } + // Fall through. + case parser::end_attribute: + { + events_.emplace_back (e, ""); + break; + } + case parser::characters: + { + string& s (p.value ()); + + assert (!events_.empty ()); // Contains root element start. + + if (events_.back ().first != parser::start_attribute) + len += s.size (); + + events_.emplace_back (e, move (s)); + break; + } + default: + assert (false); + } + + if (truncated) + { + events_.emplace_back (parser::end_element, ""); // Close root. + break; + } + } + + // Unwrap the fragment removing the root element events. + // + assert (events_.size () >= 2); + events_.erase (events_.begin ()); + events_.pop_back (); + } + + void fragment:: + operator() (serializer& s) const + { + for (const auto& e: events_) + { + switch (e.first) + { + case parser::start_element: + { + s.start_element (xmlns, e.second); + break; + } + case parser::start_attribute: + { + s.start_attribute (e.second); + break; + } + case parser::end_element: + { + s.end_element (); + break; + } + case parser::end_attribute: + { + s.end_attribute (); + break; + } + case parser::characters: + { + s.characters (e.second); + break; + } + default: + assert (false); + } + } + } + } +} diff --git a/web/xhtml/fragment.hxx b/web/xhtml/fragment.hxx new file mode 100644 index 0000000..eab4335 --- /dev/null +++ b/web/xhtml/fragment.hxx @@ -0,0 +1,52 @@ +// file : web/xhtml/fragment.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_XHTML_FRAGMENT_HXX +#define WEB_XHTML_FRAGMENT_HXX + +#include +#include +#include // pair + +#include +#include + +namespace web +{ + namespace xhtml + { + // A parsed (via xml::parser) XHTML fragment that can later be serialized + // to xml::serializer. + // + class fragment + { + public: + bool truncated = false; + + public: + fragment () = default; + + // Parse string as an XHTML document fragment, truncating it if + // requested. The fragment should be complete, in the sense that all + // elements should have closing tags. Elements and attributes are + // considered to be in the namespace of the entire XHTML document, so no + // namespace should be specified for them. Do not validate against XHTML + // vocabulary. Can throw xml::parsing exception. + // + fragment (const std::string& xhtml, + const std::string& input_name, + size_t length = 0); + + void + operator() (xml::serializer&) const; + + bool + empty () const {return events_.empty ();} + + private: + std::vector> events_; + }; + } +} + +#endif // WEB_XHTML_FRAGMENT_HXX diff --git a/web/xhtml/serialization.hxx b/web/xhtml/serialization.hxx new file mode 100644 index 0000000..03e72ff --- /dev/null +++ b/web/xhtml/serialization.hxx @@ -0,0 +1,358 @@ +// file : web/xhtml/serialization.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_XHTML_SERIALIZATION_HXX +#define WEB_XHTML_SERIALIZATION_HXX + +#include + +#include + +namespace web +{ + // "Canonical" XHTML5 vocabulary. + // + // * One-letter tag names and local variable clash problem + // + // a at|an|an anc anch + // b bt|bo|bl bld bold + // i it|it|it itl ital + // p pt|pr|pr par para + // q qt|qu|qt quo quot + // s st|st|st stk strk + // u ut|un|un unl undr + // + // Other options: + // - _a, a_, xa + // - A, I + // - x::i + // - user-defined literals: "a"_e, "/a"_e, "id"_a + // + // Things can actually get much worse, consider: + // + // int i; + // s << i << "text" << ~i; + // + // So perhaps this is the situation where the explicit namespace + // qualification (e.g., x::p) is the only robust option? + // + // + // * Element/attribute name clash problem (e.g., STYLE) + // + // - some attribute/element name decorator (STYLEA, STYLE_A, STYLE_) + // - rename attribute/element (e.g., STYLEDEF or CSSSTYLE[adds TYPE]); + // in case of STYLE we should probably rename the element since + // attribute will be much more frequently used. + // - "scope" attributes inside elements (P::STYLE); somewhat + // burdensome: P(P::STYLE); could then use low-case names + // for attributes + // - "scope" elements inside other elements (HEAD::STYLE); also + // burdensome. + // + // + // * Text wrapping/indentation + // + // For some (inline) elements we want additional indentation: + // + // 1. Indent content on newline (e.g., for