aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-06-21 13:05:43 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-06-21 13:05:43 +0300
commitf6c20ad37b2ececb446b5051837bccba93c81d7a (patch)
tree5500b098a06a46ae21ab6f61973dd78489a6a607
parent5e538c45eb61bf9baa09cf2ef4a9a9148e8acab0 (diff)
Move regex utilities to libbutl
-rw-r--r--build2/buildfile1
-rw-r--r--build2/regex.cxx42
-rw-r--r--build2/regex.hxx57
-rw-r--r--build2/regex.txx215
-rw-r--r--build2/test/script/builtin.cxx3
-rw-r--r--build2/test/script/runner.cxx2
6 files changed, 2 insertions, 318 deletions
diff --git a/build2/buildfile b/build2/buildfile
index 3a114e2..6d48718 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -26,7 +26,6 @@ exe{b}: \
{hxx cxx}{ operation } \
{hxx cxx}{ parser } \
{hxx cxx}{ prerequisite } \
- {hxx txx cxx}{ regex } \
{hxx cxx}{ rule } \
{hxx }{ rule-map } \
{hxx txx cxx}{ scheduler } \
diff --git a/build2/regex.cxx b/build2/regex.cxx
deleted file mode 100644
index d96b860..0000000
--- a/build2/regex.cxx
+++ /dev/null
@@ -1,42 +0,0 @@
-// file : build2/regex.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#include <build2/regex.hxx>
-
-#if defined(_MSC_VER) && _MSC_VER <= 1910
-# include <cstring> // strstr()
-#endif
-
-#include <ostream>
-#include <sstream>
-
-namespace std
-{
- // Currently libstdc++ just returns the name of the exception (bug #67361).
- // So we check that the description contains at least one space character.
- //
- // While VC's description is meaningful, it has an undesired prefix that
- // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
- //
- ostream&
- operator<< (ostream& o, const regex_error& e)
- {
- const char* d (e.what ());
-
-#if defined(_MSC_VER) && _MSC_VER <= 1910
- const char* rd (strstr (d, "): "));
- if (rd != nullptr)
- d = rd + 3;
-#endif
-
- ostringstream os;
- os << runtime_error (d); // Sanitize the description.
-
- string s (os.str ());
- if (s.find (' ') != string::npos)
- o << ": " << s;
-
- return o;
- }
-}
diff --git a/build2/regex.hxx b/build2/regex.hxx
deleted file mode 100644
index 1fa261b..0000000
--- a/build2/regex.hxx
+++ /dev/null
@@ -1,57 +0,0 @@
-// file : build2/regex.hxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#ifndef BUILD2_REGEX_HXX
-#define BUILD2_REGEX_HXX
-
-#include <regex>
-#include <iosfwd>
-#include <string> // basic_string
-
-#include <build2/types.hxx>
-#include <build2/utility.hxx>
-
-namespace build2
-{
- // Like std::regex_match() but extends the standard ECMA-262
- // substitution escape sequences with a subset of Perl sequences:
- //
- // \\, \u, \l, \U, \L, \E, \1, ..., \9
- //
- // Also return the resulting string as well as whether the search
- // succeeded.
- //
- // Notes and limitations:
- //
- // - The only valid regex_constants flags are match_default,
- // format_first_only (format_no_copy can easily be supported).
- //
- // - If backslash doesn't start any of the listed sequences then it is
- // silently dropped and the following character is copied as is.
- //
- // - The character case conversion is performed according to the global
- // C++ locale (which is, unless changed, is the same as C locale and
- // both default to the POSIX locale aka "C").
- //
- template <typename C>
- pair<std::basic_string<C>, bool>
- regex_replace_ex (const std::basic_string<C>&,
- const std::basic_regex<C>&,
- const std::basic_string<C>& fmt,
- std::regex_constants::match_flag_type =
- std::regex_constants::match_default);
-}
-
-namespace std
-{
- // Print regex error description but only if it is meaningful (this is also
- // why we have to print leading colon).
- //
- ostream&
- operator<< (ostream&, const regex_error&);
-}
-
-#include <build2/regex.txx>
-
-#endif // BUILD2_REGEX_HXX
diff --git a/build2/regex.txx b/build2/regex.txx
deleted file mode 100644
index 1325de9..0000000
--- a/build2/regex.txx
+++ /dev/null
@@ -1,215 +0,0 @@
-// file : build2/regex.txx -*- C++ -*-
-// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-namespace build2
-{
- template <typename C>
- pair<std::basic_string<C>, bool>
- regex_replace_ex (const std::basic_string<C>& s,
- const std::basic_regex<C>& re,
- const std::basic_string<C>& fmt,
- std::regex_constants::match_flag_type flags)
- {
- using namespace std;
-
- using string_type = basic_string<C>;
- using str_it = typename string_type::const_iterator;
- using regex_it = regex_iterator<str_it>;
-
- bool first_only ((flags & std::regex_constants::format_first_only) ==
- std::regex_constants::format_first_only);
-
- locale cl; // Copy of the global C++ locale.
- string_type r;
-
- // Beginning of the last unmatched substring.
- //
- str_it ub (s.begin ());
-
- regex_it b (s.begin (), s.end (), re, flags);
- regex_it e;
- bool match (b != e);
-
- for (regex_it i (b); i != e; ++i)
- {
- const match_results<str_it>& m (*i);
-
- // Copy the preceeding unmatched substring, save the beginning of the
- // one that follows.
- //
- r.append (ub, m.prefix ().second);
- ub = m.suffix ().first;
-
- if (first_only && i != b)
- r.append (m[0].first, m[0].second); // Append matched substring.
- else
- {
- // The standard implementation calls m.format() here. We perform our
- // own formatting.
- //
- // Note that we are using char type literals with the assumption that
- // being ASCII characters they will be properly "widened" to the
- // corresponding literals of the C template parameter type.
- //
- auto digit = [] (C c) -> int
- {
- return c >= '0' && c <= '9' ? c - '0' : -1;
- };
-
- enum class case_conv {none, upper, lower, upper_once, lower_once}
- mode (case_conv::none);
-
- auto conv_chr = [&mode, &cl] (C c) -> C
- {
- switch (mode)
- {
- case case_conv::upper_once: mode = case_conv::none;
- case case_conv::upper: c = toupper (c, cl); break;
- case case_conv::lower_once: mode = case_conv::none;
- case case_conv::lower: c = tolower (c, cl); break;
- case case_conv::none: break;
- }
- return c;
- };
-
- auto append_chr = [&r, &conv_chr] (C c)
- {
- r.push_back (conv_chr (c));
- };
-
- auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e)
- {
- // Optimize for the common case.
- //
- if (mode == case_conv::none)
- r.append (b, e);
- else
- {
- for (str_it i (b); i != e; ++i)
- r.push_back (conv_chr (*i));
- }
- };
-
- size_t n (fmt.size ());
- for (size_t i (0); i < n; ++i)
- {
- C c (fmt[i]);
-
- switch (c)
- {
- case '$':
- {
- // Check if this is a $-based escape sequence. Interpret it
- // accordingly if that's the case, treat '$' as a regular
- // character otherwise.
- //
- c = fmt[++i]; // '\0' if last.
-
- switch (c)
- {
- case '$': append_chr (c); break;
- case '&': append_str (m[0].first, m[0].second); break;
- case '`':
- {
- append_str (m.prefix ().first, m.prefix ().second);
- break;
- }
- case '\'':
- {
- append_str (m.suffix ().first, m.suffix ().second);
- break;
- }
- default:
- {
- // Check if this is a sub-expression 1-based index ($n or
- // $nn). Append the matching substring if that's the case.
- // Treat '$' as a regular character otherwise. Index greater
- // than the sub-expression count is silently ignored.
- //
- int si (digit (c));
- if (si >= 0)
- {
- int d;
- if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last.
- {
- si = si * 10 + d;
- ++i;
- }
- }
-
- if (si > 0)
- {
- // m[0] refers to the matched substring.
- //
- if (static_cast<size_t> (si) < m.size ())
- append_str (m[si].first, m[si].second);
- }
- else
- {
- // Not a $-based escape sequence so treat '$' as a
- // regular character.
- //
- --i;
- append_chr ('$');
- }
-
- break;
- }
- }
-
- break;
- }
- case '\\':
- {
- c = fmt[++i]; // '\0' if last.
-
- switch (c)
- {
- case '\\': append_chr (c); break;
-
- case 'u': mode = case_conv::upper_once; break;
- case 'l': mode = case_conv::lower_once; break;
- case 'U': mode = case_conv::upper; break;
- case 'L': mode = case_conv::lower; break;
- case 'E': mode = case_conv::none; break;
- default:
- {
- // Check if this is a sub-expression 1-based index. Append
- // the matching substring if that's the case, Skip '\\'
- // otherwise. Index greater than the sub-expression count is
- // silently ignored.
- //
- int si (digit (c));
- if (si > 0)
- {
- // m[0] refers to the matched substring.
- //
- if (static_cast<size_t> (si) < m.size ())
- append_str (m[si].first, m[si].second);
- }
- else
- --i;
-
- break;
- }
- }
-
- break;
- }
- default:
- {
- // Append a regular character.
- //
- append_chr (c);
- break;
- }
- }
- }
- }
- }
-
- r.append (ub, s.end ()); // Append the rightmost non-matched substring.
- return make_pair (move (r), match);
- }
-}
diff --git a/build2/test/script/builtin.cxx b/build2/test/script/builtin.cxx
index 42e02d8..2a8150d 100644
--- a/build2/test/script/builtin.cxx
+++ b/build2/test/script/builtin.cxx
@@ -14,12 +14,11 @@
#include <ostream>
#include <sstream>
+#include <libbutl/regex.hxx>
#include <libbutl/path-io.hxx> // use default operator<< implementation
#include <libbutl/fdstream.hxx> // fdopen_mode, fdstream_mode
#include <libbutl/filesystem.hxx> // mkdir_status
-#include <build2/regex.hxx>
-
#include <build2/test/script/script.hxx>
// Strictly speaking a builtin which reads/writes from/to standard streams
diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx
index 8269f05..a4ead93 100644
--- a/build2/test/script/runner.cxx
+++ b/build2/test/script/runner.cxx
@@ -7,9 +7,9 @@
#include <set>
#include <ios> // streamsize
+#include <libbutl/regex.hxx>
#include <libbutl/fdstream.hxx> // fdopen_mode, fdnull(), fddup()
-#include <build2/regex.hxx>
#include <build2/variable.hxx>
#include <build2/filesystem.hxx>