Get rid of C++ modules related code and rename *.mxx files to *.hxx

author: Karen Arutyunov <karen@codesynthesis.com> 2021-09-28 19:24:31 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2021-09-28 20:29:59 +0300
commit: df1ef68cd8e8582724ce1192bfc202e0b9aeaf0c (patch)
tree: b731ca4c68e60c00c7e7d499dbf4868ee7b71f44 /libbutl/utf8.mxx
parent: 7a4fc37f264cdb67f2f83fa92703c869215bbc86 (diff)
1 files changed, 0 insertions, 130 deletions
diff --git a/libbutl/utf8.mxx b/libbutl/utf8.mxx
deleted file mode 100644
index 15e8ded..0000000
--- a/libbutl/utf8.mxx
+++ /dev/null
@@ -1,130 +0,0 @@
-// file      : libbutl/utf8.mxx -*- C++ -*-
-// license   : MIT; see accompanying LICENSE file
-
-#ifndef __cpp_modules_ts
-#pragma once
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstdint> // uint8_t
-#include <utility> // pair
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.utf8;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.unicode;
-#else
-#include <libbutl/unicode.mxx>
-#endif
-
-#include <libbutl/export.hxx>
-
-LIBBUTL_MODEXPORT namespace butl
-{
-  // Here and below we will refer to bytes that encode a singe Unicode
-  // codepoint as "UTF-8 byte sequence" ("UTF-8 sequence" or "byte sequence"
-  // for short) and a sequence of such sequences as "UTF-8 encoded byte
-  // string" ("byte string" for short).
-  //
-
-  // Validate a UTF-8 encoded byte string one byte at a time. Optionally, also
-  // validate that its decoded codepoints belong to the specified types or
-  // codepoint whitelist.
-  //
-  class utf8_validator
-  {
-  public:
-    // Note: use whitelist via shallow copy.
-    //
-    explicit
-    utf8_validator (codepoint_types = codepoint_types::any,
-                    const char32_t* whitelist = nullptr);
-
-    // Validate the next byte returning true if it is valid (first) and
-    // whether it is the last byte of a codepoint (second). The {false, true}
-    // result indicates a byte sequence decoded into a codepoint of undesired
-    // type rather than an invalid byte that happens to be the last in the
-    // sequence (and may well be a valid starting byte of the next sequence).
-    //
-    // Note that in case the byte is invalid, calling this function again
-    // without recovery is illegal.
-    //
-    std::pair<bool, bool>
-    validate (char);
-
-    // As above but in case of an invalid byte also return the description of
-    // why it is invalid.
-    //
-    // Note that the description only contains the reason why the specified
-    // byte is not part of a valid UTF-8 sequence or the desired codepoint
-    // type, for example:
-    //
-    // "invalid UTF-8 sequence first byte (0xB0)"
-    // "invalid Unicode codepoint (reserved)"
-    //
-    // It can be used to form complete diagnostics along these lines:
-    //
-    // cerr << "invalid manifest value " << name << ": " << what << endl;
-    //
-    std::pair<bool, bool>
-    validate (char, std::string& what);
-
-    // As above but decide whether the description is needed at runtime (what
-    // may be NULL).
-    //
-    std::pair<bool, bool>
-    validate (char, std::string* what);
-
-    // Recover from an invalid byte.
-    //
-    // This function must be called with the first invalid and then subsequent
-    // bytes until it signals that the specified byte is valid. Note that it
-    // shall not be called if the sequence is decoded into a codepoint of an
-    // undesired type.
-    //
-    // Note also that a byte being invalid in the middle of a UTF-8 sequence
-    // may be valid as a first byte of the next sequence.
-    //
-    std::pair<bool, bool>
-    recover (char);
-
-    // Return the codepoint of the last byte sequence.
-    //
-    // This function can only be legally called after validate() or recover()
-    // signal that the preceding byte is valid and last.
-    //
-    char32_t
-    codepoint () const;
-
-  private:
-    codepoint_types types_;
-    const char32_t* whitelist_;
-
-    // State machine.
-    //
-    uint8_t seq_size_;      // [1 4]; calculated at the first byte validation.
-    uint8_t seq_index_ = 0; // [0 3]
-
-    // Last byte sequence decoded codepoint (built incrementally).
-    //
-    char32_t codepoint_;
-
-    // The byte range a valid UTF-8 sequence second byte must belong to as
-    // calculated during the first byte validation.
-    //
-    // Note that the subsequent (third and forth) bytes must belong to the
-    // [80 BF] range regardless to the previous bytes.
-    //
-    std::pair<unsigned char, unsigned char> byte2_range_;
-  };
-}
-
-#include <libbutl/utf8.ixx>
author	Karen Arutyunov <karen@codesynthesis.com>	2021-09-28 19:24:31 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2021-09-28 20:29:59 +0300
commit	df1ef68cd8e8582724ce1192bfc202e0b9aeaf0c (patch)
tree	b731ca4c68e60c00c7e7d499dbf4868ee7b71f44 /libbutl/utf8.mxx
parent	7a4fc37f264cdb67f2f83fa92703c869215bbc86 (diff)