Mercurial > repos > jdv > albacore
comparison denoise.pl @ 1:0a4f83207e53 draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/albacore commit 4aa7a76a7b29c425dd89a020979e835d785d3c95-dirty
author | jdv |
---|---|
date | Wed, 06 Sep 2017 12:12:52 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:f8e25d69167d | 1:0a4f83207e53 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 use strict; | |
4 use warnings; | |
5 use 5.012; | |
6 | |
7 use File::Basename qw/basename/; | |
8 use File::Copy qw/copy/; | |
9 use Getopt::Long; | |
10 use List::Util qw/sum/; | |
11 | |
12 my $fn_table; | |
13 my @reads; | |
14 my @names; | |
15 my $min_score = 80; | |
16 my $min_frac = 0.05; | |
17 my $rm_unclass = 0; | |
18 my $n_keep; | |
19 my $fn_summary; | |
20 | |
21 use constant BARCODE => 14; | |
22 use constant SCORE => 15; | |
23 | |
24 my $unclass_tag = 'unclassified'; | |
25 | |
26 GetOptions( | |
27 'input=s' => \@reads, | |
28 'name=s' => \@names, | |
29 'table=s' => \$fn_table, | |
30 'min_score=f' => \$min_score, | |
31 'min_frac=f' => \$min_frac, | |
32 'remove_unclassified' => \$rm_unclass, | |
33 'n_keep=i' => \$n_keep, | |
34 'summary=s' => \$fn_summary, | |
35 ); | |
36 | |
37 die "Table not found\n" | |
38 if (! -r $fn_table); | |
39 | |
40 open my $tsv, '<', $fn_table; | |
41 my $head = <$tsv>; | |
42 chomp $head; | |
43 my @fields = split "\t", $head; | |
44 | |
45 die "unexpected field order" | |
46 if ($fields[BARCODE] ne 'barcode_arrangement'); | |
47 die "unexpected field order" | |
48 if ($fields[SCORE] ne 'barcode_score'); | |
49 | |
50 my %counts; | |
51 my %sums; | |
52 | |
53 while (my $line = <$tsv>) { | |
54 chomp $line; | |
55 my @fields = split "\t", $line; | |
56 my $bin = $fields[BARCODE]; | |
57 my $score = $fields[SCORE]; | |
58 $counts{$bin} += 1; | |
59 $sums{$bin} += $score; | |
60 } | |
61 | |
62 if ($rm_unclass) { | |
63 delete $counts{$unclass_tag}; | |
64 delete $sums{$unclass_tag}; | |
65 } | |
66 | |
67 my @keys = sort {$counts{$b} <=> $counts{$a}} keys %counts; | |
68 | |
69 my %scores; | |
70 | |
71 my %status; | |
72 | |
73 @status{ @keys } = ('discarded') x scalar(@keys); | |
74 | |
75 for (@keys) { | |
76 $scores{$_} = $sums{$_}/$counts{$_}; | |
77 } | |
78 my $sum_count = sum(values %counts); | |
79 my $mean_count = $sum_count / scalar(values %counts); | |
80 | |
81 if (defined $n_keep) { | |
82 | |
83 my %rank_scores; | |
84 @rank_scores{ @keys } = map {$scores{$_} * $counts{$_}/$sum_count} @keys; | |
85 | |
86 @keys = sort {$rank_scores{$b} <=> $rank_scores{$a}} keys %rank_scores; | |
87 | |
88 @keys = @keys[0..$n_keep-1]; | |
89 | |
90 } | |
91 | |
92 else { | |
93 | |
94 @keys = grep {$scores{$_} >= $min_score} @keys; | |
95 @keys = grep {$counts{$_} >= $mean_count*$min_frac} @keys; | |
96 | |
97 } | |
98 | |
99 @status{ @keys } = ('kept') x scalar(@keys); | |
100 | |
101 @keys = sort {$counts{$b} <=> $counts{$a}} keys %status; | |
102 | |
103 # print summary table | |
104 open my $summary, '>', $fn_summary | |
105 or die "Failed to open summary: $!\n"; | |
106 say {$summary} join "\t", | |
107 '#bin', | |
108 'n_reads', | |
109 'avg_score', | |
110 'status', | |
111 ; | |
112 for (@keys) { | |
113 say {$summary} join "\t", | |
114 $_, | |
115 $counts{$_}, | |
116 sprintf("%0.3f",$scores{$_}), | |
117 $status{$_}, | |
118 ; | |
119 } | |
120 close $summary; | |
121 | |
122 mkdir('outputs'); | |
123 | |
124 for my $i (0..$#reads) { | |
125 | |
126 my $fn = $reads[$i]; | |
127 my $name = $names[$i]; | |
128 | |
129 my $base = basename($name); | |
130 my $bin = $base; | |
131 $bin =~ s/\.(fastq|fq|fast5\.tar\.gz)$//i; | |
132 if (defined $status{$bin} && $status{$bin} eq 'kept') { | |
133 copy( $fn, "outputs/$base" ); | |
134 } | |
135 | |
136 } |