Mercurial > repos > iuc > fasta_stats
annotate fasta-stats.pl @ 2:d7421f5dbb4f draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
| author | iuc |
|---|---|
| date | Mon, 26 Apr 2021 10:01:17 +0000 |
| parents | 53c14c29c2fd |
| children | 5b072a9eaa9d |
| rev | line source |
|---|---|
|
0
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env perl |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
2 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
3 # fasta-stats |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
4 # written by torsten.seemann@monash.edu |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
5 # oct 2012 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
6 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
7 use strict; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
8 use warnings; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
9 use List::Util qw(sum min max); |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
10 |
|
1
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
11 |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
12 #Parameters |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
13 my $file = shift; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
14 my $calc_ng50 = 0; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
15 my $genome_size = 0; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
16 if (scalar(@ARGV) > 0){ |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
17 $genome_size = $ARGV[0]; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
18 $calc_ng50 = 1; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
19 } |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
20 |
|
0
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
21 # stat storage |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
22 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
23 my $n=0; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
24 my $seq = ''; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
25 my %stat; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
26 my @len; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
27 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
28 # MAIN LOOP collecting sequences |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
29 |
|
1
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
30 #open the file first |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
31 open IN, $file or die{ "Couldn't open $file for reading\n$!" }; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
32 |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
33 while (my $line = <IN>) { |
|
0
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
34 chomp $line; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
35 if ($line =~ m/^\s*>/) { |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
36 process($seq) if $n; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
37 $n++; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
38 $seq=''; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
39 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
40 else { |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
41 $seq .= $line; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
42 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
43 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
44 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
45 process($seq) if $n; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
46 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
47 # sort length array |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
48 # (should use hash here for efficiency with huge no of short reads?) |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
49 |
|
2
d7421f5dbb4f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
iuc
parents:
1
diff
changeset
|
50 @len = sort { $b <=> $a } @len; |
|
0
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
51 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
52 # compute more stats |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
53 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
54 $stat{'num_seq'} = scalar(@len); |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
55 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
56 if (@len) { |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
57 $stat{'num_bp'} = sum(@len); |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
58 $stat{'len_min'} = $len[0]; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
59 $stat{'len_max'} = $len[-1]; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
60 $stat{'len_median'} = $len[int(@len/2)]; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
61 $stat{'len_mean'} = int( $stat{'num_bp'} / $stat{'num_seq'} ); |
|
1
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
62 |
|
0
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
63 # calculate n50 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
64 my $thresh = int 0.5 * $stat{'num_bp'}; |
|
2
d7421f5dbb4f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
iuc
parents:
1
diff
changeset
|
65 ($stat{'len_N50'}, $stat{'L50'}) = &calc_x50(\@len, $thresh); |
|
1
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
66 |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
67 #calculate NG50 |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
68 if ($calc_ng50) { |
|
2
d7421f5dbb4f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
iuc
parents:
1
diff
changeset
|
69 my $thresh = int 0.5 * $genome_size; |
|
d7421f5dbb4f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
iuc
parents:
1
diff
changeset
|
70 ($stat{'len_NG50'}, $stat{'LG50'}) = &calc_x50(\@len, $thresh); |
|
0
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
71 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
72 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
73 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
74 #calculate GC content |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
75 $stat{'num_bp_not_N'} = $stat{'num_G'} + $stat{'num_C'} + $stat{'num_A'} + $stat{'num_T'}; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
76 $stat{'GC_content'} = ($stat{'num_G'} + $stat{'num_C'}) / $stat{'num_bp_not_N'}*100; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
77 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
78 # print stats as .tsv |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
79 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
80 for my $name (sort keys %stat) { |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
81 if ($name =~ m/GC_content/){ |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
82 printf "%s\t%0.1f\n", $name, $stat{$name}; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
83 } else { |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
84 printf "%s\t%s\n", $name, $stat{$name}; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
85 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
86 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
87 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
88 # run for each sequence |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
89 |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
90 sub process { |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
91 my($s) = @_; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
92 # base composition |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
93 for my $x (qw(A G T C N)) { |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
94 my $count = $s =~ s/$x/$x/gi; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
95 $stat{"num_$x"} += $count; |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
96 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
97 # keep list of all lengths encountered |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
98 push @len, length($s); |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
99 } |
|
be48db09665c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff
changeset
|
100 |
|
1
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
101 # N50/NG50 calculation sub |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
102 |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
103 sub calc_x50{ |
|
2
d7421f5dbb4f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
iuc
parents:
1
diff
changeset
|
104 my $ref = shift; |
|
d7421f5dbb4f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
iuc
parents:
1
diff
changeset
|
105 my @x = @$ref; |
|
1
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
106 my $thresh = shift; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
107 my $cum=0; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
108 for my $i (0 .. $#x) { |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
109 $cum += $x[$i]; |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
110 if ($cum >= $thresh) { |
|
2
d7421f5dbb4f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
iuc
parents:
1
diff
changeset
|
111 return $x[$i], $i+1; |
|
1
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
112 } |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
113 } |
|
2
d7421f5dbb4f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
iuc
parents:
1
diff
changeset
|
114 return (0,0); |
|
1
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
115 } |
|
53c14c29c2fd
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents:
0
diff
changeset
|
116 |
