|
0
|
1 #!/usr/bin/perl
|
|
|
2
|
|
|
3 #-------------------------------------------------------------------------------
|
|
|
4 #
|
|
|
5 # Convert fasta headers from protein ID (NP_XXXX) into transcript ID NM_XXXX
|
|
|
6 #
|
|
|
7 #
|
|
|
8 # Pablo Cingolani
|
|
|
9 #-------------------------------------------------------------------------------
|
|
|
10
|
|
|
11 # Parse command line arguments
|
|
|
12 $refLink = $ARGV[0];
|
|
|
13 die "Usage: cat file.fasta | ./proteinFasta2NM.pl refLink.txt > protein_NM.fasta" if $refLink eq '';
|
|
|
14
|
|
|
15 # Read refLink file
|
|
|
16 open RL, $refLink or die "Cannot opne file '$refLink'\n";
|
|
|
17 while( <RL> ) {
|
|
|
18 chomp;
|
|
|
19 @t = split /\t/;
|
|
|
20 ($nm, $np) = ($t[2], $t[3]);
|
|
|
21
|
|
|
22 if( $np ne '' ) {
|
|
|
23 if( $trId{$np} ne '' ) { print STDERR "Error: Non empty entry '$np' = $trId{$np}\n"; }
|
|
|
24 else { $trId{$np} = $nm; }
|
|
|
25 }
|
|
|
26 }
|
|
|
27
|
|
|
28 # Read fasta file
|
|
|
29 while( <STDIN> ) {
|
|
|
30 chomp;
|
|
|
31
|
|
|
32 if( /^>(.*)/ ) { # Header? => change form protein NP_XXX to transcript NM_XXXX
|
|
|
33 # Get NM_ field
|
|
|
34 @t = split /\|/;
|
|
|
35 $np = $t[3];
|
|
|
36
|
|
|
37 # Remove anything after the dot
|
|
|
38 if( $np =~ /(.*)\./ ) { $np = $1; }
|
|
|
39
|
|
|
40 # Found a transcript ID?
|
|
|
41 if( $trId{$np} ne '' ) { print ">$trId{$np}\n"; }
|
|
|
42 else { print "$l\n"; }
|
|
|
43 } else { print "$_\n"; } # Show line
|
|
|
44 }
|
|
|
45
|