Mercurial > repos > greg > snpeff_v2_from_pablo
diff snpEff_2_1a/scripts/proteinFasta2NM.pl @ 0:f8eaa3f8194b default tip
Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
| author | greg |
|---|---|
| date | Fri, 20 Apr 2012 14:47:09 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpEff_2_1a/scripts/proteinFasta2NM.pl Fri Apr 20 14:47:09 2012 -0400 @@ -0,0 +1,45 @@ +#!/usr/bin/perl + +#------------------------------------------------------------------------------- +# +# Convert fasta headers from protein ID (NP_XXXX) into transcript ID NM_XXXX +# +# +# Pablo Cingolani +#------------------------------------------------------------------------------- + +# Parse command line arguments +$refLink = $ARGV[0]; +die "Usage: cat file.fasta | ./proteinFasta2NM.pl refLink.txt > protein_NM.fasta" if $refLink eq ''; + +# Read refLink file +open RL, $refLink or die "Cannot opne file '$refLink'\n"; +while( <RL> ) { + chomp; + @t = split /\t/; + ($nm, $np) = ($t[2], $t[3]); + + if( $np ne '' ) { + if( $trId{$np} ne '' ) { print STDERR "Error: Non empty entry '$np' = $trId{$np}\n"; } + else { $trId{$np} = $nm; } + } +} + +# Read fasta file +while( <STDIN> ) { + chomp; + + if( /^>(.*)/ ) { # Header? => change form protein NP_XXX to transcript NM_XXXX + # Get NM_ field + @t = split /\|/; + $np = $t[3]; + + # Remove anything after the dot + if( $np =~ /(.*)\./ ) { $np = $1; } + + # Found a transcript ID? + if( $trId{$np} ne '' ) { print ">$trId{$np}\n"; } + else { print "$l\n"; } + } else { print "$_\n"; } # Show line +} +
