Задача: Утилиты
Исходник: полноценный поиск и замена под unix, язык: perl [code #575, hits: 7451]
автор: - [добавлен: 12.01.2009]
  1. #!/bin/perl
  2.  
  3. # Author: Andrey Dibrov (andry at inbox dot ru)
  4. # Licence: GNU GPL
  5. # Date: 2008.09.15
  6.  
  7. # SaR => Search and Replace.
  8. #
  9. # Perl version required: 5.6.0 or higher (for "@-"/"@+" regexp variables).
  10. #
  11. # Format: sar.pl [<Options>] <SearchPattern> [<ReplacePattern>] [<Flags>]
  12. # [<RoutineProlog>] [<RoutineEpilog>]
  13. # Script searches in standard input text signatures, matches/replaces them by
  14. # predefined text with regexp variables (\0, \1, etc) and prints result dependent
  15. # on options type.
  16. # Command arguments:
  17. # <Options>: Options, defines basic behaviour of script.
  18. # [Optional,Fixed]
  19. # Format: [m | s]
  20. # m - Forces "match" behaviour, when script prints only matched text without
  21. # any substitution.
  22. # s - Forces "substitution" behaviour, when script prints result of
  23. # substitution.
  24. # If no options defined, when script chooses which type of behaviour use by
  25. # presence of <ReplacePattern> argument. See "description".
  26. # <SearchPattern>: Search pattern string.
  27. # [Required,Fixed]
  28. # <ReplacePattern>: Replace pattern string.
  29. # [Optional,Fixed]
  30. # Used only in "substitution" behaviour, but can be used in "match" behaviour
  31. # when execution activated by flags 'e' or 'x'. In case of "match" behaviour
  32. # execution of replace string internally emulated by substitution.
  33. # <Flags>:
  34. # [Optional,Fixed]
  35. # Format: [i][g][m][e | x]
  36. # i - Case flag. Case-insensitive search.
  37. # g - Global flag. Continue search after first match/match last.
  38. # m - Treat string as multiple lines. Enabling regexp characters - "^" and
  39. # "$" match begin and end of each line in string, otherwise these
  40. # characters match begin string and EOF.
  41. # s - Treat string as single line. Enabling regexp character - "." match any
  42. # character in string, even "carrage return" or "line feed", otherwise
  43. # match only line characters (any character except "carrage return" and
  44. # "line feed").
  45. # e - Execute and substitute flag. Execute <ReplacePattern> and apply
  46. # substitution for executed result.
  47. # Example: ./sar.pl s '(123)' 'my $A=$1; $A++; print $A; $1;' 'ge'
  48. # For each match, prints "124" and replace matched string by result
  49. # of execution, e.g. by "$1". After all matches was done, prints
  50. # input text with applied replacement(s).
  51. # If "match" behaviour is on, then have the same behaviour as flag 'x'.
  52. # x - Execute only flag. Execute <ReplacePattern> without substitution.
  53. # Example: ./sar.pl m '(123)' 'my $A=$1; $A++; print $A; $1;' 'gx'
  54. # For each match, prints "124". After all matches was done, nothing
  55. # prints any more.
  56. # <RoutineProlog>:
  57. # [Optional,Fixed]
  58. # Execution routine which executes before all match/substitution if text
  59. # matched. Enabled only when defined flag 'e' or flag 'x'.
  60. # <RoutineEpilog>:
  61. # [Optional,Fixed]
  62. # Execution routine which executes after all matches/substitutions if text
  63. # matched. Enabled only when defined flag 'e' or flag 'x'.
  64. # Argument legend:
  65. # "Required" - value required.
  66. # "Optional" - value optional.
  67. # "Fixed" - position of value in argument list is fixed.
  68. # Description:
  69. # If required arguments are empty, then prints input string if "substitution"
  70. # behaviour is on, otherwise nothing prints.
  71. # If replace string is empty and options doesn't defined, then instead
  72. # substitution used text match only.
  73. # In "match" behaviour if match was successful, matched text is printed and
  74. # returns 0, otherwise prints nothing and returns non 0.
  75. # When "substitution" behaviour is on, script checks execution flag.
  76. # If execution flag not defined, then script prints input text with
  77. # applied replacements and returns 0, otherwise prints input text and returns
  78. # non 0.
  79. # If execution flag is defined, then script executes replace string in each
  80. # match, after prints input text with applied replacements (only for flag 'e'),
  81. # and returns 0, otherwise prints input text and returns non 0.
  82.  
  83. use strict;
  84. use warnings;
  85.  
  86. my $buffer = "";
  87. my $subBuffer;
  88. my $isEof = eof(STDIN);
  89. my $charsRead = 0;
  90. while(!$isEof) {
  91. $charsRead = read(STDIN,$subBuffer,65536);
  92. if($charsRead < 65536) {
  93. $isEof = 1;
  94. }
  95. $buffer .= $subBuffer;
  96. }
  97. $subBuffer = "";
  98.  
  99. if(!defined($buffer) || length($buffer) == 0) {
  100. exit 1;
  101. }
  102.  
  103. my $optionsStr = defined($ARGV[0]) ? $ARGV[0] : "";
  104. my $matchStr = defined($ARGV[1]) ? $ARGV[1] : "";
  105. if(length($matchStr) == 0) {
  106. print($buffer);
  107. exit 1;
  108. }
  109.  
  110. my $replaceStr = defined($ARGV[2]) ? $ARGV[2] : "";
  111. my $flagsStr = defined($ARGV[3]) ? $ARGV[3] : "";
  112. my $execPrologStr = defined($ARGV[4]) ? $ARGV[4] : "";
  113. my $execEpilogStr = defined($ARGV[5]) ? $ARGV[5] : "";
  114.  
  115. #Use "substitution" behaviour.
  116. my $doMatchOnly = 0;
  117. if(index($optionsStr,'m') != -1 ||
  118. index($optionsStr,'s') == -1 && length($replaceStr) == 0) {
  119. #Use "match" behaviour.
  120. $doMatchOnly = 1;
  121. }
  122.  
  123. my $rexFlags = "";
  124. if(index($flagsStr,'i') != -1) {
  125. $rexFlags .= 'i';
  126. }
  127. if(index($flagsStr,'g') != -1) {
  128. $rexFlags .= 'g';
  129. }
  130. if(index($flagsStr,'m') != -1) {
  131. $rexFlags .= 'm';
  132. }
  133. my $doMultiLine2 = 0;
  134. if(index($flagsStr,'s') != -1) {
  135. $rexFlags .= 's';
  136. }
  137.  
  138. my $doEvaluate = 0;
  139. my $doExecuteOnly = 0;
  140. if(index($flagsStr,'x') != -1) {
  141. $rexFlags .= 'e';
  142. $doExecuteOnly = 1;
  143. $doEvaluate = 1;
  144. } elsif(index($flagsStr,'e') != -1) {
  145. $rexFlags .= 'e';
  146. $doEvaluate = 1;
  147. }
  148.  
  149. my $regexpMatched = 0;
  150. my $regexpMatchOffset = -1;
  151. my $regexpNextOffset = 0;
  152.  
  153. =head
  154. Numeric variables expand function.
  155. Returns expanded string, otherwise original string.
  156. =cut
  157. sub expandString#($str,@numVars,$numVarValueLimit = 256)
  158. {
  159. my($str,@numVars,$numVarValueLimit) = @_;
  160. if(!defined($numVarValueLimit)) {
  161. $numVarValueLimit = 256;
  162. }
  163.  
  164. my $numeric = "0123456789";
  165. my $strLen = length($str);
  166. my $isEscSeq = 0;
  167. my $numVar = "";
  168. my $numVarLen;
  169. my $numVarValue;
  170. my $numVarValueLen;
  171. my $replaceLenDelta;
  172. for(my $i = 0; $i <= $strLen; $i++) {
  173. $numVarLen = length($numVar);
  174. my $char = substr($str,$i,1);
  175. if(length($char) == 0) {
  176. if($numVarLen > 0) {
  177. goto SPOT_PARSENUMVAR;
  178. } else {
  179. goto SPOT_BREAK_LOOP;
  180. }
  181. }
  182. if($char eq '\\') {
  183. if(!$isEscSeq) {
  184. $isEscSeq = 1;
  185. } else {
  186. if($numVarLen > 0) {
  187. $isEscSeq = 0;
  188. goto SPOT_PARSENUMVAR;
  189. }
  190. $isEscSeq = 0;
  191. $str = substr($str,0,$i).substr($str,$i+1);
  192. $i--;
  193. $strLen--;
  194. }
  195. next;
  196. }
  197. my $isNum = index($numeric,$char);
  198. if($isNum != -1) {
  199. if($isEscSeq) {
  200. $numVar .= $char;
  201. }
  202. } else {
  203. if($numVarLen > 0) {
  204. $isEscSeq = 0;
  205. goto SPOT_PARSENUMVAR;
  206. }
  207. $isEscSeq = 0;
  208. }
  209. next;
  210. SPOT_PARSENUMVAR:
  211. if($numVar >= $numVarValueLimit) {
  212. $numVar = "";
  213. next;
  214. }
  215. $numVarValue = $numVars[$numVar];
  216. if(!defined($numVarValue)) {
  217. $numVar = "";
  218. next;
  219. }
  220. $numVarValueLen = length($numVarValue);
  221. $replaceLenDelta = $numVarValueLen-$numVarLen-1;
  222. $str = substr($str,0,$i-$numVarLen-1).$numVarValue.substr($str,$i);
  223. $i += $replaceLenDelta-1;
  224. $strLen += $replaceLenDelta;
  225. $numVar = "";
  226. }
  227.  
  228. SPOT_BREAK_LOOP:
  229. return $str;
  230. }
  231.  
  232. =head
  233. String match function.
  234. Returns array of regexp variables ($0, $1, etc).
  235. If string was matched, then result flag returned in $regexpMatched
  236. variable, otherwise $regexpMatched would empty.
  237. =cut
  238. sub matchString#($str,$strMatch,$rexFlags = "")
  239. {
  240. my($str,$strMatch,$rexFlags) = @_;
  241. if(!defined($rexFlags)) {
  242. $rexFlags = "";
  243. }
  244.  
  245. if(!defined($str) || length($str) == 0) {
  246. return "";
  247. }
  248.  
  249. my $evalFlagOffset = index($rexFlags,'e');
  250. my $filteredRexFlags = $evalFlagOffset != -1 ?
  251. substr($rexFlags,0,$evalFlagOffset).substr($rexFlags,$evalFlagOffset+1) : $rexFlags;
  252.  
  253. my $globalFlagOffset = index($rexFlags,'g');
  254. my $sysVarRegexpMatchOffset;
  255. my $sysVarRegexpNextOffset;
  256. if($globalFlagOffset != -1) {
  257. $sysVarRegexpMatchOffset = '$-[$#-]';
  258. $sysVarRegexpNextOffset = '$+[$#+]';
  259. } else {
  260. $sysVarRegexpMatchOffset = '$-[0]';
  261. $sysVarRegexpNextOffset = '$+[0]';
  262. }
  263.  
  264. my $numVar0;
  265. my $numVar1;
  266. my @numVars;
  267. my $evalStr = '@numVars = ($str =~ m/$strMatch/'.$filteredRexFlags.');'."\n".
  268. '$numVar0 = $&;'."\n".
  269. '$numVar1 = $1;'."\n".
  270. '$regexpMatchOffset = (defined('.$sysVarRegexpMatchOffset.') ? '.$sysVarRegexpMatchOffset.' : 0);'."\n".
  271. '$regexpNextOffset = (defined('.$sysVarRegexpNextOffset.') ? '.$sysVarRegexpNextOffset.' : 0);'."\n";
  272. eval($evalStr);
  273.  
  274. if($#numVars == -1) {
  275. $numVars[0] = $numVar0;
  276. $regexpMatched = 0;
  277. } elsif($#numVars == 0) {
  278. if(!defined($numVar1)) {
  279. $numVars[0] = $numVar0;
  280. } else {
  281. unshift(@numVars,$numVar0);
  282. }
  283. $regexpMatched = 1;
  284. } else {
  285. unshift(@numVars,$numVar0);
  286. $regexpMatched = 1;
  287. }
  288.  
  289. return @numVars;
  290. }
  291.  
  292. =head
  293. Simple string substitution.
  294. Returns result of substitution.
  295. =cut
  296. sub substString#($str,$toSearch,$toReplace,$rexFlags = "")
  297. {
  298. my($str,$toSearch,$toReplace,$rexFlags) = @_;
  299. if(!defined($rexFlags)) {
  300. $rexFlags = "";
  301. }
  302.  
  303. if(!defined($str) || length($str) == 0) {
  304. return "";
  305. }
  306.  
  307. my $evalStr;
  308. my $evalFlagOffset = index($rexFlags,'e');
  309.  
  310. my $globalFlagOffset = index($rexFlags,'g');
  311. my $sysVarRegexpMatchOffset;
  312. if($globalFlagOffset != -1) {
  313. $sysVarRegexpMatchOffset = '$-[$#-]';
  314. } else {
  315. $sysVarRegexpMatchOffset = '$-[0]';
  316. }
  317.  
  318. if($evalFlagOffset == -1) {
  319. $evalStr =
  320. '$str =~ s/$toSearch/$toReplace/'.$rexFlags.';'."\n".
  321. '$regexpMatchOffset = (defined('.$sysVarRegexpMatchOffset.') ? '.$sysVarRegexpMatchOffset.' : 0);'."\n".
  322. '$regexpNextOffset = length($str)-(defined($'."'".') ? length($'."'".') : 0);'."\n";
  323. } else {
  324. $evalStr =
  325. '$str =~ s/$toSearch/'.$toReplace.'/'.$rexFlags.';'."\n".
  326. '$regexpMatchOffset = (defined('.$sysVarRegexpMatchOffset.') ? '.$sysVarRegexpMatchOffset.' : 0);'."\n".
  327. '$regexpNextOffset = length($str)-(defined($'."'".') ? length($'."'".') : 0);'."\n";
  328. }
  329.  
  330. eval($evalStr);
  331.  
  332. return $str;
  333. }
  334.  
  335. =head
  336. Evaluate search pattern.
  337. =cut
  338. sub evaluateSearchPattern#($doMatchOnly,$doEvaluate,$doExecuteOnly,$str,$toSearch,$toReplace,$execProlog,$execEpilog,$rexFlags = "")
  339. {
  340. my($doMatchOnly,$doEvaluate,$doExecuteOnly,$str,$toSearch,$toReplace,$execProlog,$execEpilog,$rexFlags) = @_;
  341. if(!defined($rexFlags)) {
  342. $rexFlags = "";
  343. }
  344.  
  345. my $evalStr = "";
  346. @sys::numVars = ();
  347.  
  348. my $resultStr;
  349.  
  350. my $breakSearch = 0;
  351. my $prevStr = "";
  352. my $nextStr = $str;
  353. my $newStr = "";
  354. my $expandStr;
  355. my $expandStrLen;
  356.  
  357. my $globalFlagOffset = index($rexFlags,'g');
  358. my $filteredRexFlags = $globalFlagOffset != -1 ?
  359. substr($rexFlags,0,$globalFlagOffset).substr($rexFlags,$globalFlagOffset+1) : $rexFlags;
  360.  
  361. if($doMatchOnly) {
  362. @sys::numVars = matchString($nextStr,$matchStr,(!$doEvaluate ? $rexFlags : $filteredRexFlags));
  363. if(!defined($regexpMatched) || length($regexpMatched) == 0 || $regexpMatched == 0) {
  364. return 2;
  365. }
  366. $resultStr = $sys::numVars[0];
  367. if(defined($execProlog) && length($execProlog) > 0) {
  368. $evalStr .= $execProlog.';'."\n";
  369. }
  370. $prevStr = substr($nextStr,0,$regexpMatchOffset);
  371. $nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";
  372. if(!$doEvaluate) {
  373. if(defined($resultStr) && length($resultStr) > 0) {
  374. $evalStr .= 'print($resultStr);'."\n";
  375. }
  376. } else {
  377. if(length($nextStr) > 0) {
  378. $evalStr .= 'substString($nextStr,$toSearch,$toReplace,$rexFlags);'."\n";
  379. }
  380. if(!$doExecuteOnly) {
  381. if(defined($resultStr) && length($resultStr) > 0) {
  382. $evalStr .= 'print($resultStr);'."\n";
  383. }
  384. }
  385. }
  386. if(defined($execEpilog) && length($execEpilog) > 0) {
  387. $evalStr .= $execEpilog.';'."\n";
  388. }
  389. } else {
  390. @sys::numVars = matchString($nextStr,$toSearch,$filteredRexFlags);
  391. if(!defined($regexpMatched) || length($regexpMatched) == 0 || $regexpMatched == 0) {
  392. if(!$doEvaluate || !$doExecuteOnly) {
  393. print($str);
  394. }
  395. return 2;
  396. }
  397. if(defined($execProlog) && length($execProlog) > 0) {
  398. $evalStr .= $execProlog.';'."\n";
  399. }
  400. if(!$doEvaluate) {
  401. $evalStr .=
  402. '$prevStr = substr($nextStr,0,$regexpMatchOffset);'."\n".
  403. '$nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";'."\n".
  404. '$expandStr = expandString($toReplace,@sys::numVars);'."\n".
  405. '$expandStrLen = length($expandStr);'."\n".
  406. '$nextStr = substString($nextStr,$toSearch,$expandStr,$filteredRexFlags);'."\n".
  407. 'if(defined($nextStr) && length($nextStr) > 0) {'."\n".
  408. ' $prevStr .= substr($nextStr,0,$expandStrLen);'."\n".
  409. ' $nextStr = $expandStrLen < length($nextStr) ? substr($nextStr,$expandStrLen) : "";'."\n".
  410. ' $breakSearch = $breakSearch ? 1 : (length($nextStr) == 0 ? 1 : 0);'."\n".
  411. ' while(!$breakSearch) {'."\n".
  412. ' @sys::numVars = matchString($nextStr,$toSearch,$filteredRexFlags);'."\n".
  413. ' $breakSearch = !(defined($regexpMatched) && length($regexpMatched) != 0 && $regexpMatched != 0);'."\n".
  414. ' if(!$breakSearch) {'."\n".
  415. ' $prevStr .= substr($nextStr,0,$regexpMatchOffset);'."\n".
  416. ' $nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";'."\n".
  417. ' $expandStr = expandString($toReplace,@sys::numVars);'."\n".
  418. ' $expandStrLen = length($expandStr);'."\n".
  419. ' $nextStr = substString($nextStr,$toSearch,$expandStr,$filteredRexFlags);'."\n".
  420. ' if(defined($nextStr) && length($nextStr) > 0) {'."\n".
  421. ' $prevStr .= substr($nextStr,0,$expandStrLen);'."\n".
  422. ' $nextStr = $expandStrLen < length($nextStr) ? substr($nextStr,$expandStrLen) : "";'."\n".
  423. ' } else {'."\n".
  424. ' $breakSearch = 1;'."\n".
  425. ' }'."\n".
  426. ' } else {'."\n".
  427. ' $breakSearch = 1;'."\n".
  428. ' }'."\n".
  429. ' }'."\n".
  430. '}'."\n".
  431. '$newStr = (defined($prevStr) ? $prevStr : "").(defined($nextStr) ? $nextStr : "");'."\n".
  432. 'if(length($newStr) > 0) {'."\n".
  433. ' print($newStr);'."\n".
  434. '} else {'."\n".
  435. ' print($str);'."\n".
  436. '}'."\n";
  437. } else {
  438. $evalStr .=
  439. '$prevStr = substr($nextStr,0,$regexpMatchOffset);'."\n".
  440. '$nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";'."\n".
  441. '$nextStr = substString($nextStr,$toSearch,$toReplace,$filteredRexFlags);'."\n".
  442. 'if(defined($nextStr) && length($nextStr) > 0) {'."\n".
  443. ' $prevStr .= substr($nextStr,0,$regexpNextOffset);'."\n".
  444. ' $nextStr = $regexpNextOffset < length($nextStr) ? substr($nextStr,$regexpNextOffset) : "";'."\n".
  445. ' $breakSearch = $breakSearch ? 1 : (length($nextStr) == 0 ? 1 : 0);'."\n".
  446. ' while(!$breakSearch) {'."\n".
  447. ' @sys::numVars = matchString($nextStr,$toSearch,$filteredRexFlags);'."\n".
  448. ' $breakSearch = !(defined($regexpMatched) && length($regexpMatched) != 0 && $regexpMatched != 0);'."\n".
  449. ' if(!$breakSearch) {'."\n".
  450. ' $prevStr .= substr($nextStr,0,$regexpMatchOffset);'."\n".
  451. ' $nextStr = $regexpMatchOffset < length($nextStr) ? substr($nextStr,$regexpMatchOffset) : "";'."\n".
  452. ' $nextStr = substString($nextStr,$toSearch,$toReplace,$filteredRexFlags);'."\n".
  453. ' if(defined($nextStr) && length($nextStr) > 0) {'."\n".
  454. ' $prevStr .= substr($nextStr,0,$regexpNextOffset);'."\n".
  455. ' $nextStr = $regexpNextOffset < length($nextStr) ? substr($nextStr,$regexpNextOffset) : "";'."\n".
  456. ' } else {'."\n".
  457. ' $breakSearch = 1;'."\n".
  458. ' }'."\n".
  459. ' } else {'."\n".
  460. ' $breakSearch = 1;'."\n".
  461. ' }'."\n".
  462. ' }'."\n".
  463. '}'."\n".
  464. '$newStr = (defined($prevStr) ? $prevStr : "").(defined($nextStr) ? $nextStr : "");'."\n".
  465. 'if(!$doExecuteOnly) {'."\n".
  466. ' if(length($newStr) > 0) {'."\n".
  467. ' print($newStr);'."\n".
  468. ' } else {'."\n".
  469. ' print($str);'."\n".
  470. ' }'."\n".
  471. '}'."\n";
  472. }
  473. if(defined($execEpilog) && length($execEpilog) > 0) {
  474. $evalStr = $execEpilog.';'."\n";
  475. }
  476. }
  477.  
  478. if(defined($evalStr) && length($evalStr) > 0) {
  479. eval($evalStr);
  480. }
  481.  
  482. return 0;
  483. }
  484.  
  485. my $resultStr;
  486.  
  487. my $resultCode = evaluateSearchPattern($doMatchOnly,$doEvaluate,$doExecuteOnly,
  488. $buffer,$matchStr,$replaceStr,$execPrologStr,$execEpilogStr,$rexFlags);
  489.  
  490. exit $resultCode;
Надо было сделать под "юниксы" полноценный поиск и замену многострочного текста по паттерну, пришлось сваять скрипт

http://www.rsdn.ru/Forum/message/3115750.1.aspx

+добавить реализацию