Mercurial > repos > jdv > nanopolish
diff nanopolish_variants.pl @ 0:2136c2725fc4 draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
author | jdv |
---|---|
date | Wed, 06 Sep 2017 12:15:45 -0400 |
parents | |
children | a1d433401bc2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nanopolish_variants.pl Wed Sep 06 12:15:45 2017 -0400 @@ -0,0 +1,127 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use 5.012; + +use Archive::Tar; +use Cwd qw/getcwd abs_path/; +use File::Copy qw/copy/; +use Getopt::Long qw/:config pass_through/; +use threads; +use threads::shared; +use BioX::Seq::Stream; + +my $fn_genome; +my $threads = 1; +my $fn_outfile; +my $fn_consensus; +my $fn_fast5; + +# remember full command string (with proper binary) + +# parse genome filename and add back to arg stack +GetOptions( + 'genome=s' => \$fn_genome, + 'threads=i' => \$threads, + 'outfile=s' => \$fn_outfile, + 'consensus=s' => \$fn_consensus, + 'fast5=s' => \$fn_fast5, +); + +my $tmp_dir = 'tmp_dir'; +mkdir $tmp_dir; + +#extract FAST5 files to path where they are expected +my $in_dir = 'in_dir'; +mkdir $in_dir; +my $cwd = abs_path( getcwd() ); +chdir $in_dir; +my $tar = Archive::Tar->new(); +$tar->read($fn_fast5); +$tar->extract(); +chdir $cwd; + +my @cmd = @ARGV; +unshift @cmd, 'nanopolish'; +push @cmd, '--genome', $fn_genome; + +say join ' ', @cmd; + +my @regions :shared; + +# build region tags to pass to nanopolish +my $parser = BioX::Seq::Stream->new($fn_genome); +while (my $seq = $parser->next_seq) { + push @regions, join( ':', $seq->id, + join( '-', 1, length($seq) ), + ); +} + +my @workers; +for (1..$threads) { + push @workers, threads->create(\&run); +} + +$_->join() for (@workers); + +my @fa_files = glob "$tmp_dir/*.fasta"; +my @out_files = glob "$tmp_dir/*.vcf"; + +open my $out_cons, '>', $fn_consensus + or die "Failed to open output consensus: $!"; +for (@fa_files) { + open my $in, '<', $_; + while (my $line = <$in>) { + print {$out_cons} $line; + } + close $in; +} +close $out_cons; + +# we may need to do extra processing on VCF output +open my $out_vcf, '>', $fn_outfile + or die "Failed to open output file: $!"; +for my $i (0..$#out_files) { + my $v = $out_files[$i]; + open my $in, '<', $v; + while (my $line = <$in>) { + next if ($line =~ /^\s*#/ && $i > 0); + print {$out_vcf} $line; + } + close $in; +} +close $out_vcf; + + +sub run { + + LOOP: + while (1) { + + my $tag; + + { + lock @regions; + last LOOP if (! scalar @regions); + $tag = shift @regions; + } + + my $fn_out = "$tmp_dir/$tag.vcf"; + my $fn_cons = "$tmp_dir/$tag.fasta"; + + my @cmd_local = @cmd; + push @cmd_local, '--window', $tag; + push @cmd_local, '--outfile', $fn_out; + push @cmd_local, '--consensus', $fn_cons; + + my $ret = system @cmd_local; + warn "RET: $ret ($!)\n"; + + my $cmd_string = join ' ', @cmd_local; + die "Non-zero exit value for command: $cmd_string\n" + if ($ret); + + } + +}