|
0
|
1 #!/usr/bin/perl
|
|
|
2
|
|
|
3 #-------------------------------------------------------------------------------
|
|
|
4 #
|
|
|
5 # Read a VCF file (via STDIN), split EFF fields from INFO column into many lines
|
|
|
6 # leaving one line per effect.
|
|
|
7 #
|
|
|
8 # Note: In lines having multiple effects, all other information will be
|
|
|
9 # repeated. Only the 'EFF' field will change.
|
|
|
10 #
|
|
|
11 # Pablo Cingolani 2012
|
|
|
12 #-------------------------------------------------------------------------------
|
|
|
13
|
|
|
14 $INFO_FIELD_NUM = 7;
|
|
|
15
|
|
|
16 while( $l = <STDIN> ) {
|
|
|
17 # Show header lines
|
|
|
18 if( $l =~ /^#/ ) { print $l; }
|
|
|
19 else {
|
|
|
20 chomp $l;
|
|
|
21
|
|
|
22 @t = @infos = @effs = (); # Clear arrays
|
|
|
23
|
|
|
24 # Non-header lines: Parse fields
|
|
|
25 @t = split /\t/, $l;
|
|
|
26
|
|
|
27 # Get INFO column
|
|
|
28 $info = $t[ $INFO_FIELD_NUM ];
|
|
|
29
|
|
|
30 # Parse INFO column
|
|
|
31 @infos = split /;/, $info;
|
|
|
32
|
|
|
33 # Find EFF field
|
|
|
34 $infStr = "";
|
|
|
35 foreach $inf ( @infos ) {
|
|
|
36 # Is this the EFF field? => Find it and split it
|
|
|
37 if( $inf =~/^EFF=(.*)/ ) { @effs = split /,/, $1; }
|
|
|
38 else { $infStr .= ( $infStr eq '' ? '' : ';' ) . $inf; }
|
|
|
39 }
|
|
|
40
|
|
|
41 # Print VCF line
|
|
|
42 if( $#effs <= 0 ) { print "$l\n"; } # No EFF found, just show line
|
|
|
43 else {
|
|
|
44 $pre = "";
|
|
|
45 for( $i=0 ; $i < $INFO_FIELD_NUM ; $i++ ) { $pre .= ( $i > 0 ? "\t" : "" ) . "$t[$i]"; }
|
|
|
46
|
|
|
47 $post = "";
|
|
|
48 for( $i=$INFO_FIELD_NUM+1 ; $i <= $#t ; $i++ ) { $post .= "\t$t[$i]"; }
|
|
|
49
|
|
|
50 foreach $eff ( @effs ) { print $pre . "\t" . $infStr . ( $infStr eq '' ? '' : ';' ) . "EFF=$eff" . $post . "\n" ; }
|
|
|
51 }
|
|
|
52 }
|
|
|
53 }
|