comparison snpEff_2_1a/scripts/proteinFasta2NM.pl @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f8eaa3f8194b
1 #!/usr/bin/perl
2
3 #-------------------------------------------------------------------------------
4 #
5 # Convert fasta headers from protein ID (NP_XXXX) into transcript ID NM_XXXX
6 #
7 #
8 # Pablo Cingolani
9 #-------------------------------------------------------------------------------
10
11 # Parse command line arguments
12 $refLink = $ARGV[0];
13 die "Usage: cat file.fasta | ./proteinFasta2NM.pl refLink.txt > protein_NM.fasta" if $refLink eq '';
14
15 # Read refLink file
16 open RL, $refLink or die "Cannot opne file '$refLink'\n";
17 while( <RL> ) {
18 chomp;
19 @t = split /\t/;
20 ($nm, $np) = ($t[2], $t[3]);
21
22 if( $np ne '' ) {
23 if( $trId{$np} ne '' ) { print STDERR "Error: Non empty entry '$np' = $trId{$np}\n"; }
24 else { $trId{$np} = $nm; }
25 }
26 }
27
28 # Read fasta file
29 while( <STDIN> ) {
30 chomp;
31
32 if( /^>(.*)/ ) { # Header? => change form protein NP_XXX to transcript NM_XXXX
33 # Get NM_ field
34 @t = split /\|/;
35 $np = $t[3];
36
37 # Remove anything after the dot
38 if( $np =~ /(.*)\./ ) { $np = $1; }
39
40 # Found a transcript ID?
41 if( $trId{$np} ne '' ) { print ">$trId{$np}\n"; }
42 else { print "$l\n"; }
43 } else { print "$_\n"; } # Show line
44 }
45