#!/bin/perl
# Author: Andrey Dibrov (andry at inbox dot ru)
# Licence: GNU GPL
# Date: 2008.09.15
# SaR => Search and Replace.
#
# Perl version required: 5.6.0 or higher (for "@-"/"@+" regexp variables).
#
# Format: sar.pl [<Options>] <SearchPattern> [<ReplacePattern>] [<Flags>]
# [<RoutineProlog>] [<RoutineEpilog>]
# Script searches in standard input text signatures, matches/replaces them by
# predefined text with regexp variables (\0, \1, etc) and prints result dependent
# on options type.
# Command arguments:
# <Options>: Options, defines basic behaviour of script.
# [Optional,Fixed]
# Format: [m | s]
# m - Forces "match" behaviour, when script prints only matched text without
# any substitution.
# s - Forces "substitution" behaviour, when script prints result of
# substitution.
# If no options defined, when script chooses which type of behaviour use by
# presence of <ReplacePattern> argument. See "description".
# <SearchPattern>: Search pattern string.
# [Required,Fixed]
# <ReplacePattern>: Replace pattern string.
# [Optional,Fixed]
# Used only in "substitution" behaviour, but can be used in "match" behaviour
# when execution activated by flags 'e' or 'x'. In case of "match" behaviour
# execution of replace string internally emulated by substitution.
# <Flags>:
# [Optional,Fixed]
# Format: [i][g][m][e | x]
# i - Case flag. Case-insensitive search.
# g - Global flag. Continue search after first match/match last.
# m - Treat string as multiple lines. Enabling regexp characters - "^" and
# "$" match begin and end of each line in string, otherwise these
# characters match begin string and EOF.
# s - Treat string as single line. Enabling regexp character - "." match any
# character in string, even "carrage return" or "line feed", otherwise
# match only line characters (any character except "carrage return" and
# "line feed").
# e - Execute and substitute flag. Execute <ReplacePattern> and apply
# substitution for executed result.
# Example: ./sar.pl s '(123)' 'my $A=$1; $A++; print $A; $1;' 'ge'
# For each match, prints "124" and replace matched string by result
# of execution, e.g. by "$1". After all matches was done, prints
# input text with applied replacement(s).
# If "match" behaviour is on, then have the same behaviour as flag 'x'.
# x - Execute only flag. Execute <ReplacePattern> without substitution.
# Example: ./sar.pl m '(123)' 'my $A=$1; $A++; print $A; $1;' 'gx'
# For each match, prints "124". After all matches was done, nothing
# prints any more.
# <RoutineProlog>:
# [Optional,Fixed]
# Execution routine which executes before all match/substitution if text
# matched. Enabled only when defined flag 'e' or flag 'x'.
# <RoutineEpilog>:
# [Optional,Fixed]
# Execution routine which executes after all matches/substitutions if text
# matched. Enabled only when defined flag 'e' or flag 'x'.
# Argument legend:
# "Required" - value required.
# "Optional" - value optional.
# "Fixed" - position of value in argument list is fixed.
# Description:
# If required arguments are empty, then prints input string if "substitution"
# behaviour is on, otherwise nothing prints.
# If replace string is empty and options doesn't defined, then instead
# substitution used text match only.
# In "match" behaviour if match was successful, matched text is printed and
# returns 0, otherwise prints nothing and returns non 0.
# When "substitution" behaviour is on, script checks execution flag.
# If execution flag not defined, then script prints input text with
# applied replacements and returns 0, otherwise prints input text and returns
# non 0.
# If execution flag is defined, then script executes replace string in each
# match, after prints input text with applied replacements (only for flag 'e'),
# and returns 0, otherwise prints input text and returns non 0.
use strict;
use warnings;
my $buffer = "";
my $subBuffer;
my $charsRead = 0;
while(!$isEof) {
$charsRead =
read(STDIN,
$subBuffer,
65536);
if($charsRead < 65536) {
$isEof = 1;
}
$buffer .= $subBuffer;
}
$subBuffer = "";
}
my $optionsStr =
defined($ARGV[0]) ?
$ARGV[0] :
"";
my $matchStr =
defined($ARGV[1]) ?
$ARGV[1] :
"";
}
my $replaceStr =
defined($ARGV[2]) ?
$ARGV[2] :
"";
my $flagsStr =
defined($ARGV[3]) ?
$ARGV[3] :
"";
my $execPrologStr =
defined($ARGV[4]) ?
$ARGV[4] :
"";
my $execEpilogStr =
defined($ARGV[5]) ?
$ARGV[5] :
"";
#Use "substitution" behaviour.
my $doMatchOnly = 0;
if(index($optionsStr,
'm') !=
-1 ||
index($optionsStr,
's') ==
-1 &&
length($replaceStr) ==
0) { #Use "match" behaviour.
$doMatchOnly = 1;
}
my $rexFlags = "";
if(index($flagsStr,
'i') !=
-1) { $rexFlags .= 'i';
}
if(index($flagsStr,
'g') !=
-1) { $rexFlags .= 'g';
}
if(index($flagsStr,
'm') !=
-1) { $rexFlags .= 'm';
}
my $doMultiLine2 = 0;
if(index($flagsStr,
's') !=
-1) { $rexFlags .= 's';
}
my $doEvaluate = 0;
my $doExecuteOnly = 0;
if(index($flagsStr,
'x') !=
-1) { $rexFlags .= 'e';
$doExecuteOnly = 1;
$doEvaluate = 1;
} elsif(index($flagsStr,
'e') !=
-1) { $rexFlags .= 'e';
$doEvaluate = 1;
}
my $regexpMatched = 0;
my $regexpMatchOffset = -1;
my $regexpNextOffset = 0;
=head
Numeric variables expand function.
Returns expanded string, otherwise original string.
=cut
sub expandString#($str,@numVars,$numVarValueLimit = 256)
{
my($str,@numVars,$numVarValueLimit) = @_;
$numVarValueLimit = 256;
}
my $numeric = "0123456789";
my $isEscSeq = 0;
my $numVar = "";
my $numVarLen;
my $numVarValue;
my $numVarValueLen;
my $replaceLenDelta;
for(my $i = 0; $i <= $strLen; $i++) {
if($numVarLen > 0) {
} else {
}
}
if($char eq '\\') {
if(!$isEscSeq) {
$isEscSeq = 1;
} else {
if($numVarLen > 0) {
$isEscSeq = 0;
}
$isEscSeq = 0;
$i--;
$strLen--;
}
next;
}
my $isNum =
index($numeric,
$char);
if($isNum != -1) {
if($isEscSeq) {
$numVar .= $char;
}
} else {
if($numVarLen > 0) {
$isEscSeq = 0;
}
$isEscSeq = 0;
}
next;
SPOT_PARSENUMVAR:
if($numVar >= $numVarValueLimit) {
$numVar = "";
next;
}
$numVarValue = $numVars[$numVar];
$numVar = "";
next;
}
$numVarValueLen =
length($numVarValue);
$replaceLenDelta = $numVarValueLen-$numVarLen-1;
$str =
substr($str,
0,
$i-
$numVarLen-1).
$numVarValue.
substr($str,
$i);
$i += $replaceLenDelta-1;
$strLen += $replaceLenDelta;
$numVar = "";
}
SPOT_BREAK_LOOP:
}
=head
String match function.
Returns array of regexp variables ($0, $1, etc).
If string was matched, then result flag returned in $regexpMatched
variable, otherwise $regexpMatched would empty.
=cut
sub matchString#($str,$strMatch,$rexFlags = "")
{
my($str,$strMatch,$rexFlags) = @_;
$rexFlags = "";
}
}
my $evalFlagOffset =
index($rexFlags,
'e');
my $filteredRexFlags = $evalFlagOffset != -1 ?
substr($rexFlags,
0,
$evalFlagOffset).
substr($rexFlags,
$evalFlagOffset+1) :
$rexFlags;
my $globalFlagOffset =
index($rexFlags,
'g');
my $sysVarRegexpMatchOffset;
my $sysVarRegexpNextOffset;
if($globalFlagOffset != -1) {
$sysVarRegexpMatchOffset = '$-[$#-]';
$sysVarRegexpNextOffset = '$+[$#+]';
} else {
$sysVarRegexpMatchOffset = '$-[0]';
$sysVarRegexpNextOffset = '$+[0]';
}
my $numVar0;
my $numVar1;
my @numVars;
my $evalStr = '@numVars = ($str =~ m/$strMatch/'.$filteredRexFlags.');'."\n".
'$numVar0 = $&;'."\n".
'$numVar1 = $1;'."\n".
'$regexpMatchOffset = (defined('.$sysVarRegexpMatchOffset.') ? '.$sysVarRegexpMatchOffset.' : 0);'."\n".
'$regexpNextOffset = (defined('.$sysVarRegexpNextOffset.') ? '.$sysVarRegexpNextOffset.' : 0);'."\n";
if($#numVars == -1) {
$numVars[0] = $numVar0;
$regexpMatched = 0;
} elsif($#numVars == 0) {
$numVars[0] = $numVar0;
} else {
}
$regexpMatched = 1;
} else {
$regexpMatched = 1;
}
}
=head
Simple string substitution.
Returns result of substitution.
=cut
sub substString#($str,$toSearch,$toReplace,$rexFlags = "")
{
my($str,$toSearch,$toReplace,$rexFlags) = @_;
$rexFlags = "";
}
}
my $evalStr;
my $evalFlagOffset =
index($rexFlags,
'e');
my $globalFlagOffset =
index($rexFlags,
'g');
my $sysVarRegexpMatchOffset;
if($globalFlagOffset != -1) {
$sysVarRegexpMatchOffset = '$-[$#-]';
} else {
$sysVarRegexpMatchOffset = '$-[0]';
}
if($evalFlagOffset == -1) {
$evalStr =
'$str =~ s/$toSearch/$toReplace/'.$rexFlags.';'."\n".
'$regexpMatchOffset = (defined('.$sysVarRegexpMatchOffset.') ? '.$sysVarRegexpMatchOffset.' : 0);'."\n".
'$regexpNextOffset = length($str)-(defined($'."'".') ? length($'."'".') : 0);'."\n";
} else {
$evalStr =
'$str =~ s/$toSearch/'.$toReplace.'/'.$rexFlags.';'."\n".
'$regexpMatchOffset = (defined('.$sysVarRegexpMatchOffset.') ? '.$sysVarRegexpMatchOffset.' : 0);'."\n".
'$regexpNextOffset = length($str)-(defined($'."'".') ? length($'."'".') : 0);'."\n";
}
}
=head
Evaluate search pattern.
=cut
sub evaluateSearchPattern#($doMatchOnly,$doEvaluate,$doExecuteOnly,$str,$toSearch,$toReplace,$execProlog,$execEpilog,$rexFlags = "")
{
my($doMatchOnly,$doEvaluate,$doExecuteOnly,$str,$toSearch,$toReplace,$execProlog,$execEpilog,$rexFlags) = @_;
$rexFlags = "";
}
my $evalStr = "";
@sys::numVars = ();
my $resultStr;
my $breakSearch = 0;
my $prevStr = "";
my $nextStr = $str;
my $newStr = "";
my $expandStr;
my $expandStrLen;
my $globalFlagOffset =
index($rexFlags,
'g');
my $filteredRexFlags = $globalFlagOffset != -1 ?
substr($rexFlags,
0,
$globalFlagOffset).
substr($rexFlags,
$globalFlagOffset+1) :
$rexFlags;
if($doMatchOnly) {
@sys::numVars = matchString($nextStr,$matchStr,(!$doEvaluate ? $rexFlags : $filteredRexFlags));
if(!
defined($regexpMatched) ||
length($regexpMatched) ==
0 ||
$regexpMatched ==
0) { }
$resultStr = $sys::numVars[0];
$evalStr .= $execProlog.';'."\n";
}
$prevStr =
substr($nextStr,
0,
$regexpMatchOffset);
$nextStr =
$regexpMatchOffset <
length($nextStr) ?
substr($nextStr,
$regexpMatchOffset) :
"";
if(!$doEvaluate) {
$evalStr .= 'print($resultStr);'."\n";
}
} else {
$evalStr .= 'substString($nextStr,$toSearch,$toReplace,$rexFlags);'."\n";
}
if(!$doExecuteOnly) {
$evalStr .= 'print($resultStr);'."\n";
}
}
}
$evalStr .= $execEpilog.';'."\n";
}
} else {
@sys::numVars = matchString($nextStr,$toSearch,$filteredRexFlags);
if(!
defined($regexpMatched) ||
length($regexpMatched) ==
0 ||
$regexpMatched ==
0) { if(!$doEvaluate || !$doExecuteOnly) {
}
}
$evalStr .= $execProlog.';'."\n";
}
if(!$doEvaluate) {
$evalStr .=
'$prevStr = substr($nextStr,0,$regexpMatchOffset);'."\n".
'$nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";'."\n".
'$expandStr = expandString($toReplace,@sys::numVars);'."\n".
'$expandStrLen = length($expandStr);'."\n".
'$nextStr = substString($nextStr,$toSearch,$expandStr,$filteredRexFlags);'."\n".
'if(defined($nextStr) && length($nextStr) > 0) {'."\n".
' $prevStr .= substr($nextStr,0,$expandStrLen);'."\n".
' $nextStr = $expandStrLen < length($nextStr) ? substr($nextStr,$expandStrLen) : "";'."\n".
' $breakSearch = $breakSearch ? 1 : (length($nextStr) == 0 ? 1 : 0);'."\n".
' while(!$breakSearch) {'."\n".
' @sys::numVars = matchString($nextStr,$toSearch,$filteredRexFlags);'."\n".
' $breakSearch = !(defined($regexpMatched) && length($regexpMatched) != 0 && $regexpMatched != 0);'."\n".
' if(!$breakSearch) {'."\n".
' $prevStr .= substr($nextStr,0,$regexpMatchOffset);'."\n".
' $nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";'."\n".
' $expandStr = expandString($toReplace,@sys::numVars);'."\n".
' $expandStrLen = length($expandStr);'."\n".
' $nextStr = substString($nextStr,$toSearch,$expandStr,$filteredRexFlags);'."\n".
' if(defined($nextStr) && length($nextStr) > 0) {'."\n".
' $prevStr .= substr($nextStr,0,$expandStrLen);'."\n".
' $nextStr = $expandStrLen < length($nextStr) ? substr($nextStr,$expandStrLen) : "";'."\n".
' } else {'."\n".
' $breakSearch = 1;'."\n".
' }'."\n".
' } else {'."\n".
' $breakSearch = 1;'."\n".
' }'."\n".
' }'."\n".
'}'."\n".
'$newStr = (defined($prevStr) ? $prevStr : "").(defined($nextStr) ? $nextStr : "");'."\n".
'if(length($newStr) > 0) {'."\n".
' print($newStr);'."\n".
'} else {'."\n".
' print($str);'."\n".
'}'."\n";
} else {
$evalStr .=
'$prevStr = substr($nextStr,0,$regexpMatchOffset);'."\n".
'$nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";'."\n".
'$nextStr = substString($nextStr,$toSearch,$toReplace,$filteredRexFlags);'."\n".
'if(defined($nextStr) && length($nextStr) > 0) {'."\n".
' $prevStr .= substr($nextStr,0,$regexpNextOffset);'."\n".
' $nextStr = $regexpNextOffset < length($nextStr) ? substr($nextStr,$regexpNextOffset) : "";'."\n".
' $breakSearch = $breakSearch ? 1 : (length($nextStr) == 0 ? 1 : 0);'."\n".
' while(!$breakSearch) {'."\n".
' @sys::numVars = matchString($nextStr,$toSearch,$filteredRexFlags);'."\n".
' $breakSearch = !(defined($regexpMatched) && length($regexpMatched) != 0 && $regexpMatched != 0);'."\n".
' if(!$breakSearch) {'."\n".
' $prevStr .= substr($nextStr,0,$regexpMatchOffset);'."\n".
' $nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";'."\n".
' $nextStr = substString($nextStr,$toSearch,$toReplace,$filteredRexFlags);'."\n".
' if(defined($nextStr) && length($nextStr) > 0) {'."\n".
' $prevStr .= substr($nextStr,0,$regexpNextOffset);'."\n".
' $nextStr = $regexpNextOffset < length($nextStr) ? substr($nextStr,$regexpNextOffset) : "";'."\n".
' } else {'."\n".
' $breakSearch = 1;'."\n".
' }'."\n".
' } else {'."\n".
' $breakSearch = 1;'."\n".
' }'."\n".
' }'."\n".
'}'."\n".
'$newStr = (defined($prevStr) ? $prevStr : "").(defined($nextStr) ? $nextStr : "");'."\n".
'if(!$doExecuteOnly) {'."\n".
' if(length($newStr) > 0) {'."\n".
' print($newStr);'."\n".
' } else {'."\n".
' print($str);'."\n".
' }'."\n".
'}'."\n";
}
$evalStr = $execEpilog.';'."\n";
}
}
}
}
my $resultStr;
my $resultCode = evaluateSearchPattern($doMatchOnly,$doEvaluate,$doExecuteOnly,
$buffer,$matchStr,$replaceStr,$execPrologStr,$execEpilogStr,$rexFlags);