Mercurial > repos > jdv > nanopolish
comparison nanopolish_variants.pl @ 11:550ac6458c07 draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 3a5673ca0300ee8853958d854d9f433e9600944e
author | jdv |
---|---|
date | Fri, 09 Mar 2018 21:50:06 -0500 |
parents | c00a942cfc0b |
children |
comparison
equal
deleted
inserted
replaced
10:c00a942cfc0b | 11:550ac6458c07 |
---|---|
2 | 2 |
3 use strict; | 3 use strict; |
4 use warnings; | 4 use warnings; |
5 use 5.012; | 5 use 5.012; |
6 | 6 |
7 use autodie; | |
8 | |
9 use BioX::Seq::Stream; | |
7 use Cwd qw/getcwd abs_path/; | 10 use Cwd qw/getcwd abs_path/; |
8 use File::Copy qw/copy/; | 11 use File::Copy qw/copy/; |
12 use File::Temp qw/tempdir/; | |
9 use Getopt::Long qw/:config pass_through/; | 13 use Getopt::Long qw/:config pass_through/; |
10 use List::Util qw/min/; | 14 use List::Util qw/min/; |
11 use threads; | 15 use threads; |
12 use threads::shared; | 16 use threads::shared; |
13 use BioX::Seq::Stream; | |
14 | 17 |
15 my $fn_genome; | 18 my $fn_genome; |
16 my $threads = 1; | 19 my $threads = 1; |
17 my $fn_outfile; | 20 my $fn_outfile; |
18 my $fn_consensus; | 21 my $fn_consensus; |
19 my $fn_fast5; | 22 my $fn_fast5; |
20 my $fn_reads; | 23 my $fn_reads; |
21 my $fn_index; | 24 my $fn_index; |
25 my $fn_bam; | |
22 | 26 |
23 # remember full command string (with proper binary) | 27 # remember full command string (with proper binary) |
24 | 28 |
25 # parse genome filename and add back to arg stack | 29 # parse genome filename and add back to arg stack |
26 GetOptions( | 30 GetOptions( |
29 'outfile=s' => \$fn_outfile, | 33 'outfile=s' => \$fn_outfile, |
30 'consensus=s' => \$fn_consensus, | 34 'consensus=s' => \$fn_consensus, |
31 'fast5=s' => \$fn_fast5, | 35 'fast5=s' => \$fn_fast5, |
32 'reads=s' => \$fn_reads, | 36 'reads=s' => \$fn_reads, |
33 'index=s' => \$fn_index, | 37 'index=s' => \$fn_index, |
38 'bam=s' => \$fn_bam, | |
34 ); | 39 ); |
35 | 40 |
36 my $ret; | 41 my $ret; |
37 | 42 |
43 my $cwd = abs_path( getcwd() ); | |
44 | |
45 $fn_genome = abs_path( $fn_genome ) if ( defined $fn_genome ); | |
46 $fn_outfile = abs_path( $fn_outfile ) if ( defined $fn_outfile ); | |
47 $fn_consensus = abs_path( $fn_consensus ) if ( defined $fn_consensus ); | |
48 $fn_fast5 = abs_path( $fn_fast5 ) if ( defined $fn_fast5 ); | |
49 $fn_reads = abs_path( $fn_reads ) if ( defined $fn_reads ); | |
50 $fn_index = abs_path( $fn_index ) if ( defined $fn_index ); | |
51 | |
52 # BAM filename is already symbolic link, so abs_path() won't work properly. | |
53 # What we actually want are new symbolic links to the same targets | |
54 my $fn_bai = $fn_bam; | |
55 $fn_bai =~ s/\.bam$/.bai/ | |
56 or die "Failed to replace extension of BAM index file"; | |
57 my $bam_tgt = readlink $fn_bam; | |
58 my $bai_tgt = readlink $fn_bai; | |
59 | |
60 my $tmpdir = tempdir( CLEANUP => 1 ); | |
61 | |
62 chdir $tmpdir; | |
63 mkdir 'tmp'; | |
64 symlink $bam_tgt, 'input.bam'; | |
65 symlink $bai_tgt, 'input.bai'; | |
66 | |
38 my $fn_link = 'reads'; | 67 my $fn_link = 'reads'; |
39 my $tmp_dir = 'tmp_dir'; | |
40 mkdir $tmp_dir; | |
41 | 68 |
42 # divide available threads between actual threads and regions | 69 # divide available threads between actual threads and regions |
43 # | 70 # |
44 # testing suggests minimal speed-up past 4-8 actual threads per region, so use | 71 # testing suggests minimal speed-up past 4-8 actual threads per region, so use |
45 # remaining threads for running parallel regions | 72 # remaining threads for running parallel regions |
46 my $n_threads = min( 4, $threads ); | 73 my $n_threads = min( 4, $threads ); |
47 my $n_workers = int($threads/$n_threads); | 74 my $n_workers = int($threads/$n_threads); |
48 | 75 |
49 | 76 |
50 $fn_fast5 = abs_path($fn_fast5); | |
51 | |
52 # extract FAST5 files to path where they are expected | 77 # extract FAST5 files to path where they are expected |
53 # use system 'tar' to transparently and safely handle absolute paths | 78 # use system 'tar' to transparently and safely handle absolute paths |
54 my $fast5_dir = 'fast5'; | 79 my $fast5_dir = 'fast5'; |
55 mkdir $fast5_dir; | 80 mkdir $fast5_dir; |
56 my $cwd = abs_path( getcwd() ); | |
57 chdir $fast5_dir; | 81 chdir $fast5_dir; |
58 $ret = system( | 82 $ret = system( |
59 'tar', | 83 'tar', |
60 '-xf', | 84 '-xf', |
61 $fn_fast5 | 85 $fn_fast5 |
62 ); | 86 ); |
63 die "Failed to extract tarball: $!\n" | 87 die "Failed to extract tarball: $!\n" |
64 if ($ret); | 88 if ($ret); |
65 chdir $cwd; | 89 chdir $tmpdir; |
66 | 90 |
67 symlink( $fn_reads, $fn_link ) | 91 symlink( $fn_reads, $fn_link ) |
68 or die "Failed to create symlink"; | 92 or die "Failed to create symlink"; |
69 | 93 |
70 | 94 |
92 my @cmd = @ARGV; | 116 my @cmd = @ARGV; |
93 unshift @cmd, 'nanopolish'; | 117 unshift @cmd, 'nanopolish'; |
94 push @cmd, '--genome', $fn_genome; | 118 push @cmd, '--genome', $fn_genome; |
95 push @cmd, '--reads', $fn_link; | 119 push @cmd, '--reads', $fn_link; |
96 push @cmd, '--threads', $n_threads; | 120 push @cmd, '--threads', $n_threads; |
121 push @cmd, '--bam', 'input.bam'; | |
97 | 122 |
98 my @regions :shared; | 123 my @regions :shared; |
99 | 124 |
100 # build region tags to pass to nanopolish | 125 # build region tags to pass to nanopolish |
101 if (-s $fn_genome) { # gracefully handle empty inputs | 126 if (-s $fn_genome) { # gracefully handle empty inputs |
112 push @workers, threads->create(\&run); | 137 push @workers, threads->create(\&run); |
113 } | 138 } |
114 | 139 |
115 $_->join() for (@workers); | 140 $_->join() for (@workers); |
116 | 141 |
117 my @fa_files = glob "$tmp_dir/*.fasta"; | 142 my @fa_files = glob "tmp/*.fasta"; |
118 my @out_files = glob "$tmp_dir/*.vcf"; | 143 my @out_files = glob "tmp/*.vcf"; |
119 | 144 |
120 open my $out_cons, '>', $fn_consensus | 145 open my $out_cons, '>', $fn_consensus |
121 or die "Failed to open output consensus: $!"; | 146 or die "Failed to open output consensus: $!"; |
122 for (@fa_files) { | 147 for (@fa_files) { |
123 my $parser = BioX::Seq::Stream->new($_); | 148 my $parser = BioX::Seq::Stream->new($_); |
154 lock @regions; | 179 lock @regions; |
155 last LOOP if (! scalar @regions); | 180 last LOOP if (! scalar @regions); |
156 $tag = shift @regions; | 181 $tag = shift @regions; |
157 } | 182 } |
158 | 183 |
159 my $fn_out = "$tmp_dir/$tag.vcf"; | 184 my $fn_out = "tmp/$tag.vcf"; |
160 my $fn_cons = "$tmp_dir/$tag.fasta"; | 185 my $fn_cons = "tmp/$tag.fasta"; |
161 | 186 |
162 my @cmd_local = @cmd; | 187 my @cmd_local = @cmd; |
163 push @cmd_local, '--window', $tag; | 188 push @cmd_local, '--window', $tag; |
164 push @cmd_local, '--outfile', $fn_out; | 189 push @cmd_local, '--outfile', $fn_out; |
165 push @cmd_local, '--consensus', $fn_cons; | 190 push @cmd_local, '--consensus', $fn_cons; |