| 0 | 1 #!/usr/bin/env perl | 
|  | 2 use strict; | 
|  | 3 use warnings; | 
|  | 4 use Getopt::Std; | 
|  | 5 | 
|  | 6 sub parse_command_line(); | 
|  | 7 sub build_regex_string(); | 
|  | 8 sub usage(); | 
|  | 9 | 
|  | 10 my $input_file ; | 
|  | 11 my $output_file; | 
|  | 12 my $find_pattern ; | 
|  | 13 my $replace_pattern ; | 
|  | 14 my $find_complete_words ; | 
|  | 15 my $find_pattern_is_regex ; | 
|  | 16 my $find_in_specific_column ; | 
|  | 17 my $find_case_insensitive ; | 
|  | 18 my $replace_global ; | 
|  | 19 my $skip_first_line ; | 
|  | 20 | 
|  | 21 | 
|  | 22 ## | 
|  | 23 ## Program Start | 
|  | 24 ## | 
|  | 25 usage() if @ARGV<2; | 
|  | 26 parse_command_line(); | 
|  | 27 my $regex_string = build_regex_string() ; | 
|  | 28 | 
|  | 29 # Allow first line to pass without filtering? | 
|  | 30 if ( $skip_first_line ) { | 
|  | 31 	my $line = <$input_file>; | 
|  | 32 	print $output_file $line ; | 
|  | 33 } | 
|  | 34 | 
|  | 35 | 
|  | 36 ## | 
|  | 37 ## Main loop | 
|  | 38 ## | 
|  | 39 | 
|  | 40 ## I LOVE PERL (and hate it, at the same time...) | 
|  | 41 ## | 
|  | 42 ## So what's going on with the self-compiling perl code? | 
|  | 43 ## | 
|  | 44 ## 1. The program gets the find-pattern and the replace-pattern from the user (as strings). | 
|  | 45 ## 2. If both the find-pattern and replace-pattern are simple strings (not regex), | 
|  | 46 ##    it would be possible to pre-compile a regex (with qr//) and use it in a 's///' | 
|  | 47 ## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references) | 
|  | 48 ##    it is still possible to pre-compile the regex and use it in a 's///' | 
|  | 49 ## However, | 
|  | 50 ## 4. If the replace-pattern contains back-references, pre-compiling is not possible. | 
|  | 51 ##    (in perl, you can't precompile a substitute regex). | 
|  | 52 ##    See these examples: | 
|  | 53 ##    http://www.perlmonks.org/?node_id=84420 | 
|  | 54 ##    http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl | 
|  | 55 ## | 
|  | 56 ##    The solution: | 
|  | 57 ##    we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ), | 
|  | 58 ##    Then eval() a new perl code that contains the substitution regex as inlined code. | 
|  | 59 ##    Gotta love perl! | 
|  | 60 | 
|  | 61 my $perl_program ; | 
|  | 62 if ( $find_in_specific_column ) { | 
|  | 63 	# Find & replace in specific column | 
|  | 64 | 
|  | 65 	$perl_program = <<EOF; | 
|  | 66 	while ( <STDIN> ) { | 
|  | 67 		chomp ; | 
|  | 68 		my \@columns = split ; | 
|  | 69 | 
|  | 70 		#not enough columns in this line - skip it | 
|  | 71 		next if ( \@columns < $find_in_specific_column ) ; | 
|  | 72 | 
|  | 73 		\$columns [ $find_in_specific_column - 1 ] =~ $regex_string ; | 
|  | 74 | 
|  | 75 		print STDOUT join("\t", \@columns), "\n" ; | 
|  | 76 	} | 
|  | 77 EOF | 
|  | 78 | 
|  | 79 } else { | 
|  | 80 	# Find & replace the entire line | 
|  | 81 	$perl_program = <<EOF; | 
|  | 82 		while ( <STDIN> ) { | 
|  | 83 			$regex_string ; | 
|  | 84 			print STDOUT; | 
|  | 85 		} | 
|  | 86 EOF | 
|  | 87 } | 
|  | 88 | 
|  | 89 | 
|  | 90 # The dynamic perl code reads from STDIN and writes to STDOUT, | 
|  | 91 # so connect these handles (if the user didn't specifiy input / output | 
|  | 92 # file names, these might be already be STDIN/OUT, so the whole could be a no-op). | 
|  | 93 *STDIN = $input_file ; | 
|  | 94 *STDOUT = $output_file ; | 
|  | 95 eval $perl_program ; | 
|  | 96 | 
|  | 97 | 
|  | 98 ## | 
|  | 99 ## Program end | 
|  | 100 ## | 
|  | 101 | 
|  | 102 | 
|  | 103 sub parse_command_line() | 
|  | 104 { | 
|  | 105 	my %opts ; | 
|  | 106 	getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n"; | 
|  | 107 | 
|  | 108 	die "$0: missing Find-Pattern argument\n" if (@ARGV==0); | 
|  | 109 	$find_pattern = $ARGV[0]; | 
|  | 110 	die "$0: missing Replace-Pattern argument\n" if (@ARGV==1); | 
|  | 111 	$replace_pattern = $ARGV[1]; | 
|  | 112 | 
|  | 113 	$find_complete_words = ( exists $opts{w} ) ; | 
|  | 114 	$find_case_insensitive = ( exists $opts{i} ) ; | 
|  | 115 	$skip_first_line = ( exists $opts{s} ) ; | 
|  | 116 	$find_pattern_is_regex = ( exists $opts{r} ) ; | 
|  | 117 	$replace_global = ( exists $opts{g} ) ; | 
|  | 118 | 
|  | 119 	# Search in specific column ? | 
|  | 120 	if ( defined $opts{c} ) { | 
|  | 121 		$find_in_specific_column = $opts{c}; | 
|  | 122 | 
|  | 123 		die "$0: invalid column number ($find_in_specific_column).\n" | 
|  | 124 			unless $find_in_specific_column =~ /^\d+$/ ; | 
|  | 125 | 
|  | 126 		die "$0: invalid column number ($find_in_specific_column).\n" | 
|  | 127 			if $find_in_specific_column <= 0; | 
|  | 128 	} | 
|  | 129 	else { | 
|  | 130 		$find_in_specific_column = 0 ; | 
|  | 131 	} | 
|  | 132 | 
|  | 133 	# Output File specified (instead of STDOUT) ? | 
|  | 134 	if ( defined $opts{o} ) { | 
|  | 135 		my $filename = $opts{o}; | 
|  | 136 		open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ; | 
|  | 137 	} else { | 
|  | 138 		$output_file = *STDOUT ; | 
|  | 139 	} | 
|  | 140 | 
|  | 141 | 
|  | 142 	# Input file Specified (instead of STDIN) ? | 
|  | 143 	if ( @ARGV>2 ) { | 
|  | 144 		my $filename = $ARGV[2]; | 
|  | 145 		open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ; | 
|  | 146 	} else { | 
|  | 147 		$input_file = *STDIN; | 
|  | 148 	} | 
|  | 149 } | 
|  | 150 | 
|  | 151 sub build_regex_string() | 
|  | 152 { | 
|  | 153 	my $find_string ; | 
|  | 154 	my $replace_string ; | 
|  | 155 | 
|  | 156 	if ( $find_pattern_is_regex ) { | 
|  | 157 		$find_string = $find_pattern ; | 
|  | 158 		$replace_string = $replace_pattern ; | 
|  | 159 	} else { | 
|  | 160 		$find_string = quotemeta $find_pattern ; | 
|  | 161 		$replace_string = quotemeta $replace_pattern; | 
|  | 162 	} | 
|  | 163 | 
|  | 164 	if ( $find_complete_words ) { | 
|  | 165 		$find_string = "\\b($find_string)\\b"; | 
|  | 166 	} | 
|  | 167 | 
|  | 168 	my $regex_string = "s/$find_string/$replace_string/"; | 
|  | 169 | 
|  | 170 	$regex_string .= "i" if ( $find_case_insensitive ); | 
|  | 171 	$regex_string .= "g" if ( $replace_global ) ; | 
|  | 172 | 
|  | 173 | 
|  | 174 	return $regex_string; | 
|  | 175 } | 
|  | 176 | 
|  | 177 sub usage() | 
|  | 178 { | 
|  | 179 print <<EOF; | 
|  | 180 | 
|  | 181 Find and Replace | 
|  | 182 Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu ) | 
|  | 183 | 
|  | 184 Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE] | 
|  | 185 | 
|  | 186    -g   - Global replace - replace all occurences in line/column. | 
|  | 187           Default - replace just the first instance. | 
|  | 188    -w   - search for complete words (not partial sub-strings). | 
|  | 189    -i   - case insensitive search. | 
|  | 190    -c N - check only column N, instead of entire line (line split by whitespace). | 
|  | 191    -l   - skip first line (don't replace anything in it) | 
|  | 192    -r   - FIND-PATTERN and REPLACE-PATTERN are perl regular expression, | 
|  | 193           usable inside a 's///' statement. | 
|  | 194           By default, they are used as verbatim text strings. | 
|  | 195    -o OUT - specify output file (default = STDOUT). | 
|  | 196    INPUT-FILE - (optional) read from file (default = from STDIN). | 
|  | 197 | 
|  | 198 | 
|  | 199 EOF | 
|  | 200 | 
|  | 201 	exit; | 
|  | 202 } |