aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2020-11-19 18:30:11 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2020-11-25 14:52:00 +0300
commit18de2b7b5b1b063e7a074878b4e3b3ccdd5c6ebc (patch)
tree36c60d9dd494b1b4cb714fea9900899362273e2f
parentae43c5780651d594b1ec76e99330cd6ef082b0c5 (diff)
Allow multiple -e options for sed builtin
-rw-r--r--libbutl/builtin.cxx128
-rw-r--r--tests/builtin/sed.testscript55
2 files changed, 120 insertions, 63 deletions
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx
index 7a2c024..5c1c875 100644
--- a/libbutl/builtin.cxx
+++ b/libbutl/builtin.cxx
@@ -1569,7 +1569,7 @@ namespace butl
return 1;
}
- // sed [-n|--quiet] [-i|--in-place] -e|--expression <script> [<file>]
+ // sed [-n|--quiet] [-i|--in-place] (-e|--expression <script>)... [<file>]
//
// Note: must be executed asynchronously.
//
@@ -1615,71 +1615,95 @@ namespace butl
if (ops.expression ().empty ())
fail () << "missing script";
- // Only a single script is supported.
- //
- if (ops.expression ().size () != 1)
- fail () << "multiple scripts";
-
- struct
+ struct subst
{
- string regex;
+ std::regex regex;
string replacement;
- bool icase = false;
- bool global = false;
- bool print = false;
- } subst;
+ bool global;
+ bool print;
+
+ subst (const string& re, bool ic, string rp, bool gl, bool pr)
+ //
+ // Note that ECMAScript is implied if no grammar flag is specified.
+ //
+ : regex (re, ic ? regex::icase : regex::ECMAScript),
+ replacement (move (rp)),
+ global (gl),
+ print (pr) {}
+ };
+
+ small_vector<subst, 1> substs;
+ for (const string& v: ops.expression ())
{
- const string& v (ops.expression ()[0]);
if (v.empty ())
fail () << "empty script";
if (v[0] != 's')
- fail () << "only 's' command supported";
+ fail () << "unknown command in '" << v << "': only 's' command "
+ << "supported";
// Parse the substitute command.
//
if (v.size () < 2)
- fail () << "no delimiter for 's' command";
+ fail () << "no delimiter for 's' command in '" << v << "'";
char delim (v[1]);
if (delim == '\\' || delim == '\n')
- fail () << "invalid delimiter for 's' command";
+ fail () << "invalid delimiter for 's' command in '" << v << "'";
size_t p (v.find (delim, 2));
if (p == string::npos)
- fail () << "unterminated 's' command regex";
+ fail () << "unterminated 's' command regex in '" << v << "'";
- subst.regex.assign (v, 2, p - 2);
+ string regex (v, 2, p - 2);
// Empty regex matches nothing, so not of much use.
//
- if (subst.regex.empty ())
- fail () << "empty regex in 's' command";
+ if (regex.empty ())
+ fail () << "empty regex in 's' command in '" << v << "'";
size_t b (p + 1);
p = v.find (delim, b);
if (p == string::npos)
- fail () << "unterminated 's' command replacement";
+ fail () << "unterminated 's' command replacement in '" << v << "'";
- subst.replacement.assign (v, b, p - b);
+ string replacement (v, b, p - b);
// Parse the substitute command flags.
//
+ bool icase (false);
+ bool global (false);
+ bool print (false);
+
char c;
for (++p; (c = v[p]) != '\0'; ++p)
{
switch (c)
{
- case 'i': subst.icase = true; break;
- case 'g': subst.global = true; break;
- case 'p': subst.print = true; break;
+ case 'i': icase = true; break;
+ case 'g': global = true; break;
+ case 'p': print = true; break;
default:
{
- fail () << "invalid 's' command flag '" << c << "'";
+ fail () << "invalid 's' command flag '" << c << "' in '" << v
+ << "'";
}
}
}
+
+ try
+ {
+ substs.emplace_back (regex, icase,
+ move (replacement),
+ global, print);
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful (no space).
+ //
+ fail () << "invalid regex '" << regex << "' in '" << v << "'" << e;
+ }
}
// Path of a file to edit. An empty path represents stdin.
@@ -1738,10 +1762,6 @@ namespace butl
rm = auto_rmfile (tp);
}
- // Note that ECMAScript is implied if no grammar flag is specified.
- //
- regex re (subst.regex, subst.icase ? regex::icase : regex::ECMAScript);
-
// Edit a file or STDIN.
//
try
@@ -1756,22 +1776,42 @@ namespace butl
// Read until failbit is set (throw on badbit).
//
- string s;
- while (getline (cin, s))
+ string ps;
+ while (getline (cin, ps))
{
- auto r (regex_replace_search (
- s,
- re,
- subst.replacement,
- subst.global
- ? regex_constants::format_default
- : regex_constants::format_first_only));
+ bool prn (!ops.quiet ());
+
+ for (const subst& s: substs)
+ {
+ auto r (regex_replace_search (
+ ps,
+ s.regex,
+ s.replacement,
+ s.global
+ ? regex_constants::format_default
+ : regex_constants::format_first_only));
+
+ // If the regex matches, then override the pattern space with the
+ // replacement result and print it and proceed to the next line,
+ // if requested.
+ //
+ if (r.second)
+ {
+ ps = move (r.first);
+
+ if (s.print)
+ {
+ prn = true;
+ break;
+ }
+ }
+ }
// Add newline regardless whether the source line is newline-
// terminated or not (in accordance with POSIX).
//
- if (!ops.quiet () || (r.second && subst.print))
- cout << r.first << '\n';
+ if (prn)
+ cout << ps << '\n';
}
cin.close ();
@@ -1801,12 +1841,6 @@ namespace butl
d << ": " << e;
}
}
- catch (const regex_error& e)
- {
- // Print regex_error description if meaningful (no space).
- //
- error () << "invalid regex" << e;
- }
// Can be thrown while creating cin, cout or writing to cerr.
//
catch (const io_error& e)
diff --git a/tests/builtin/sed.testscript b/tests/builtin/sed.testscript
index ad26483..7fbc9b2 100644
--- a/tests/builtin/sed.testscript
+++ b/tests/builtin/sed.testscript
@@ -89,16 +89,10 @@ test.options += -c
sed: empty script
EOE
- : multiple
- :
- $* -e 's/a//' -e 's/a//' 2>>EOE != 0
- sed: multiple scripts
- EOE
-
: invalid
:
$* -e 'z' 2>>EOE != 0
- sed: only 's' command supported
+ sed: unknown command in 'z': only 's' command supported
EOE
}
@@ -156,13 +150,13 @@ test.options += -c
: none
:
$* -e 's' 2>>EOE != 0
- sed: no delimiter for 's' command
+ sed: no delimiter for 's' command in 's'
EOE
: invalid
:
$* -e 's\\' 2>>EOE != 0
- sed: invalid delimiter for 's' command
+ sed: invalid delimiter for 's' command in 's\\'
EOE
}
@@ -171,14 +165,14 @@ test.options += -c
{
: unterminated
:
- $* -e 's/foo' 2>>/EOE != 0
- sed: unterminated 's' command regex
+ $* -e 's/foo' 2>>EOE != 0
+ sed: unterminated 's' command regex in 's/foo'
EOE
: empty
:
$* -e 's///' 2>>EOE != 0
- sed: empty regex in 's' command
+ sed: empty regex in 's' command in 's///'
EOE
: invalid
@@ -187,20 +181,20 @@ test.options += -c
: regex errors. For example '*' is parsed successfully.
:
$* -e 's/foo[/bar/' 2>>~%EOE% != 0
- %sed: invalid regex.*%
+ %sed: invalid regex 'foo\[' in 's/foo\[/bar/'.*%
EOE
}
: unterminated-replacement
:
- $* -e 's/foo/bar' 2>>/EOE != 0
- sed: unterminated 's' command replacement
+ $* -e 's/foo/bar' 2>>EOE != 0
+ sed: unterminated 's' command replacement in 's/foo/bar'
EOE
: invalid-flags
:
$* -e 's/foo/bar/a' 2>>EOE != 0
- sed: invalid 's' command flag 'a'
+ sed: invalid 's' command flag 'a' in 's/foo/bar/a'
EOE
}
@@ -314,6 +308,35 @@ test.options += -c
}
}
}
+
+ : multiple
+ :
+ {
+ $* -e 's/b/x/' -e 's/x/y/' -e 's/c/z/' <'abc' >'ayz' : replace-replacement
+
+ : new-cycle
+ :
+ $* -e 's/b/x/p' -e 's/x/y/p' <<EOI >>EOO
+ abc
+ klm
+ dxe
+ EOI
+ axc
+ klm
+ dye
+ EOO
+
+ : quiet
+ :
+ $* -n -e 's/b/x/p' -e 's/x/y/p' <<EOI >>EOO
+ abc
+ klm
+ dxe
+ EOI
+ axc
+ dye
+ EOO
+ }
}
: in-place