annotate Scan_IUPAC_output_each_match.pl @ 3:b4e22d2d7fa7 draft

Add missing Perl scripts from previous uploads.
author pjbriggs
date Mon, 09 Apr 2018 04:45:06 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
1 #! /usr/bin/perl
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
2
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
3 use strict;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
4 use FileHandle;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
5 use Bio::SeqIO;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
6 #use Statistics::Descriptive;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
7
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
8 #####
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
9 # Program to count all occurences of a particular REGEX
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
10 # in a file containing mutiple FASTA sequences.
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
11 # 11 September 2003. Ian Donaldson.
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
12 # Revised to convert IUPAC to regex
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
13 # Revised to read a multiple FASTA file
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
14 # was CountRegexGFF_IUPAC_1input_nosummary.pl
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
15 #####
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
16
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
17 #### File handles
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
18 my $input = new FileHandle;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
19 my $output = new FileHandle;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
20
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
21 #### Variables
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
22 my $file_number = 0;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
23 my $count_fwd_regex = 0;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
24 my $count_rvs_regex = 0;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
25 my $count_all_regex = 0;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
26 my $seq_tally = 0;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
27 my @seq_totals = ();
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
28
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
29 #### Command line usage
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
30 if(@ARGV != 5) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
31 die ("USAGE:
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
32 $0
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
33 IUPAC
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
34 Multiple FASTA input file
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
35 Output
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
36 Label
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
37 Skip palindromic (0=F+R-default|1=F only)\n\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
38 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
39
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
40 #### Search forward strand only?
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
41 my $skip = $ARGV[4];
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
42 unless($skip =~ /^[01]$/) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
43 die("Only accept 0 or 1 for Skip!\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
44 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
45
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
46 #### Process IUPAC string
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
47 my $iupac = $ARGV[0];
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
48 chomp $iupac;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
49 $iupac = uc($iupac);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
50
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
51 if($iupac !~ /^[ACGTRYMKWSBDHVN]+$/) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
52 die("A non-IUPAC character was detected in your input string!\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
53 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
54
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
55 #### Forward strand IUPAC
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
56 my @fwd_iupac_letters = split(//, $iupac);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
57 my @fwd_regex_list = ();
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
58
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
59 foreach my $letter (@fwd_iupac_letters) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
60 my $converted_iupac = iupac2regex($letter);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
61 push(@fwd_regex_list, $converted_iupac);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
62 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
63
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
64 my $fwd_regex = join('', @fwd_regex_list);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
65
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
66
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
67 #### Reverse strand IUPAC
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
68 my $revcomp_iupac = RevCompIUPAC($iupac);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
69 my @rev_iupac_letters = split(//, $revcomp_iupac);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
70 my @rev_regex_list = ();
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
71
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
72 foreach my $letter (@rev_iupac_letters) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
73 my $converted_iupac = iupac2regex($letter);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
74 push(@rev_regex_list, $converted_iupac);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
75 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
76
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
77 my $rvs_regex = join('', @rev_regex_list);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
78
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
79 #### Other variables
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
80 my $label = $ARGV[3];
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
81
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
82 if($label !~ /^[\w\d]+$/) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
83 die("A non-letter/number character was detected in your label string!\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
84 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
85
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
86 my $length = length($iupac);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
87
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
88 #### Open output file
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
89 $output->open(">$ARGV[2]") or die "Could not open output file $ARGV[2]!\n";
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
90 #$output->print("##gff-version 2\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
91
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
92 #if($skip == 0) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
93 # $output->print("##Pattern search: $iupac and $revcomp_iupac\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
94 #}
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
95
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
96 #else {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
97 # $output->print("##Pattern search: $iupac\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
98 #}
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
99
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
100 #### Work thru FASTA entries in the input file with SeqIO
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
101 my $seqio = Bio::SeqIO->new(-file => "$ARGV[1]" , '-format' => 'Fasta');
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
102
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
103 while( my $seqobj = $seqio->next_seq() ) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
104 $seq_tally++;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
105 my $this_seq_tally = 0;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
106 my $sequence = $seqobj->seq(); # actual sequence as a string
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
107 my $seq_id = $seqobj->id(); # header
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
108 #print(">$seq_id\n$seq\n\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
109
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
110 #$output->print(">$seq_id\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
111
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
112 #### Clean up $sequence to leave only nucleotides
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
113 $sequence =~ s/[\s\W\d]//g;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
114
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
115 while ($sequence =~ /($fwd_regex)/ig) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
116 $this_seq_tally++;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
117 $count_fwd_regex++;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
118 $count_all_regex++;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
119
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
120 my $end_position = pos($sequence);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
121 my $start_position = $end_position - ($length - 1);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
122 $output->print("$seq_id\tRegexSearch\tCNS\t$start_position\t$end_position\t.\t+\t.\t$label\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
123 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
124
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
125 #### Count reverse REGEX
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
126 unless($skip == 1) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
127 while ($sequence =~ /($rvs_regex)/ig) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
128 $this_seq_tally++;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
129 $count_rvs_regex++;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
130 $count_all_regex++;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
131
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
132 my $end_position = pos($sequence);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
133 my $start_position = $end_position - ($length - 1);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
134 $output->print("$seq_id\tRegexSearch\tCNS\t$start_position\t$end_position\t.\t-\t.\t$label\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
135 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
136
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
137 push(@seq_totals, $this_seq_tally);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
138 #$output->print("$this_seq_tally matches\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
139 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
140 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
141
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
142 #### Mean motifs per seq
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
143 #my $stat = Statistics::Descriptive::Full->new();
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
144 #$stat->add_data(@seq_totals);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
145 #my $mean = $stat->mean();
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
146
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
147
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
148 #### Print a summary file
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
149 if($skip == 0) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
150 # $output->print("##Forward: $fwd_regex. Reverse: $rvs_regex.\n",
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
151 # "##$count_fwd_regex on the forward strand.\n",
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
152 # "##$count_rvs_regex on the reverse strand.\n",
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
153 # "##$count_all_regex in total.\n",
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
154 # "##$seq_tally sequences. Mean motifs per seq = $mean\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
155 #
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
156 print STDOUT "There were $count_all_regex instances of $fwd_regex and $rvs_regex.\n";
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
157 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
158
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
159 if($skip == 1) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
160 # $output->print("##Forward: $fwd_regex.\n",
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
161 # "##$count_fwd_regex on the forward strand.\n",
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
162 # "##$seq_tally sequences. Mean motifs per seq = $mean\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
163 #
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
164 print STDOUT "There were $count_fwd_regex instances of $fwd_regex on the forward strand.\n";
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
165 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
166
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
167 $output->close;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
168
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
169 exit;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
170
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
171 sub iupac2regex {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
172 # Convert IUPAC codes to REGEX
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
173 my $iupac = shift;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
174
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
175 #### Series of regexes to convert
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
176 if($iupac =~ /A/) { return 'A' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
177 if($iupac =~ /C/) { return 'C' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
178 if($iupac =~ /G/) { return 'G' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
179 if($iupac =~ /T/) { return 'T' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
180 if($iupac =~ /M/) { return '[AC]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
181 if($iupac =~ /R/) { return '[AG]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
182 if($iupac =~ /W/) { return '[AT]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
183 if($iupac =~ /S/) { return '[CG]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
184 if($iupac =~ /Y/) { return '[CT]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
185 if($iupac =~ /K/) { return '[GT]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
186 if($iupac =~ /V/) { return '[ACG]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
187 if($iupac =~ /H/) { return '[ACT]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
188 if($iupac =~ /D/) { return '[AGT]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
189 if($iupac =~ /B/) { return '[CGT]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
190 if($iupac =~ /N/) { return '[ACGT]' }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
191
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
192 die("IUPAC not recognised by sub iupac2regex!\n");
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
193 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
194
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
195 sub RevCompIUPAC {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
196 my $iupac_string = shift;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
197 my @converted_list = ();
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
198
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
199 my @iupac_string_list = split(//, $iupac_string);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
200
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
201 @iupac_string_list = reverse(@iupac_string_list);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
202
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
203 foreach my $letter (@iupac_string_list) {
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
204 $letter =~ tr/ACGTRYMKWSBDHVN/TGCAYRKMWSVHDBN/;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
205 push(@converted_list, $letter);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
206 }
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
207
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
208 my $joined_list = join('', @converted_list);
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
209 return $joined_list;
b4e22d2d7fa7 Add missing Perl scripts from previous uploads.
pjbriggs
parents:
diff changeset
210 }