Mercurial > repos > greg > snpeff_v2_from_pablo
diff snpEff_2_1a/scripts/vcfEffOnePerLine.pl @ 0:f8eaa3f8194b default tip
Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
| author | greg |
|---|---|
| date | Fri, 20 Apr 2012 14:47:09 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpEff_2_1a/scripts/vcfEffOnePerLine.pl Fri Apr 20 14:47:09 2012 -0400 @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +#------------------------------------------------------------------------------- +# +# Read a VCF file (via STDIN), split EFF fields from INFO column into many lines +# leaving one line per effect. +# +# Note: In lines having multiple effects, all other information will be +# repeated. Only the 'EFF' field will change. +# +# Pablo Cingolani 2012 +#------------------------------------------------------------------------------- + +$INFO_FIELD_NUM = 7; + +while( $l = <STDIN> ) { + # Show header lines + if( $l =~ /^#/ ) { print $l; } + else { + chomp $l; + + @t = @infos = @effs = (); # Clear arrays + + # Non-header lines: Parse fields + @t = split /\t/, $l; + + # Get INFO column + $info = $t[ $INFO_FIELD_NUM ]; + + # Parse INFO column + @infos = split /;/, $info; + + # Find EFF field + $infStr = ""; + foreach $inf ( @infos ) { + # Is this the EFF field? => Find it and split it + if( $inf =~/^EFF=(.*)/ ) { @effs = split /,/, $1; } + else { $infStr .= ( $infStr eq '' ? '' : ';' ) . $inf; } + } + + # Print VCF line + if( $#effs <= 0 ) { print "$l\n"; } # No EFF found, just show line + else { + $pre = ""; + for( $i=0 ; $i < $INFO_FIELD_NUM ; $i++ ) { $pre .= ( $i > 0 ? "\t" : "" ) . "$t[$i]"; } + + $post = ""; + for( $i=$INFO_FIELD_NUM+1 ; $i <= $#t ; $i++ ) { $post .= "\t$t[$i]"; } + + foreach $eff ( @effs ) { print $pre . "\t" . $infStr . ( $infStr eq '' ? '' : ';' ) . "EFF=$eff" . $post . "\n" ; } + } + } +}
