annotate snpEff_2_1a/scripts/vcfEffOnePerLine.pl @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
1 #!/usr/bin/perl
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
2
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
3 #-------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
4 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
5 # Read a VCF file (via STDIN), split EFF fields from INFO column into many lines
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
6 # leaving one line per effect.
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
7 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
8 # Note: In lines having multiple effects, all other information will be
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
9 # repeated. Only the 'EFF' field will change.
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
10 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
11 # Pablo Cingolani 2012
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
12 #-------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
13
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
14 $INFO_FIELD_NUM = 7;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
15
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
16 while( $l = <STDIN> ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
17 # Show header lines
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
18 if( $l =~ /^#/ ) { print $l; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
19 else {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
20 chomp $l;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
21
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
22 @t = @infos = @effs = (); # Clear arrays
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
23
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
24 # Non-header lines: Parse fields
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
25 @t = split /\t/, $l;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
26
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
27 # Get INFO column
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
28 $info = $t[ $INFO_FIELD_NUM ];
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
29
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
30 # Parse INFO column
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
31 @infos = split /;/, $info;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
32
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
33 # Find EFF field
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
34 $infStr = "";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
35 foreach $inf ( @infos ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
36 # Is this the EFF field? => Find it and split it
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
37 if( $inf =~/^EFF=(.*)/ ) { @effs = split /,/, $1; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
38 else { $infStr .= ( $infStr eq '' ? '' : ';' ) . $inf; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
39 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
40
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
41 # Print VCF line
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
42 if( $#effs <= 0 ) { print "$l\n"; } # No EFF found, just show line
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
43 else {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
44 $pre = "";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
45 for( $i=0 ; $i < $INFO_FIELD_NUM ; $i++ ) { $pre .= ( $i > 0 ? "\t" : "" ) . "$t[$i]"; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
46
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
47 $post = "";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
48 for( $i=$INFO_FIELD_NUM+1 ; $i <= $#t ; $i++ ) { $post .= "\t$t[$i]"; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
49
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
50 foreach $eff ( @effs ) { print $pre . "\t" . $infStr . ( $infStr eq '' ? '' : ';' ) . "EFF=$eff" . $post . "\n" ; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
51 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
52 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
53 }