annotate snpEff_2_1a/scripts/proteinFasta2NM.pl @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
1 #!/usr/bin/perl
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
2
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
3 #-------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
4 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
5 # Convert fasta headers from protein ID (NP_XXXX) into transcript ID NM_XXXX
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
6 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
7 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
8 # Pablo Cingolani
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
9 #-------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
10
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
11 # Parse command line arguments
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
12 $refLink = $ARGV[0];
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
13 die "Usage: cat file.fasta | ./proteinFasta2NM.pl refLink.txt > protein_NM.fasta" if $refLink eq '';
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
14
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
15 # Read refLink file
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
16 open RL, $refLink or die "Cannot opne file '$refLink'\n";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
17 while( <RL> ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
18 chomp;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
19 @t = split /\t/;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
20 ($nm, $np) = ($t[2], $t[3]);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
21
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
22 if( $np ne '' ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
23 if( $trId{$np} ne '' ) { print STDERR "Error: Non empty entry '$np' = $trId{$np}\n"; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
24 else { $trId{$np} = $nm; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
25 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
26 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
27
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
28 # Read fasta file
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
29 while( <STDIN> ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
30 chomp;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
31
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
32 if( /^>(.*)/ ) { # Header? => change form protein NP_XXX to transcript NM_XXXX
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
33 # Get NM_ field
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
34 @t = split /\|/;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
35 $np = $t[3];
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
36
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
37 # Remove anything after the dot
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
38 if( $np =~ /(.*)\./ ) { $np = $1; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
39
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
40 # Found a transcript ID?
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
41 if( $trId{$np} ne '' ) { print ">$trId{$np}\n"; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
42 else { print "$l\n"; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
43 } else { print "$_\n"; } # Show line
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
44 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
45