view nanopolish_variants.pl @ 2:e4b6d4a53f2e draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
author jdv
date Tue, 03 Oct 2017 21:23:56 -0400
parents 2136c2725fc4
children a1d433401bc2
line wrap: on
line source

#!/usr/bin/env perl

use strict;
use warnings;
use 5.012;

use Archive::Tar;
use Cwd qw/getcwd abs_path/;
use File::Copy qw/copy/;
use Getopt::Long qw/:config pass_through/;
use threads;
use threads::shared;
use BioX::Seq::Stream;

my $fn_genome;
my $threads = 1;
my $fn_outfile;
my $fn_consensus;
my $fn_fast5;

# remember full command string (with proper binary)

# parse genome filename and add back to arg stack
GetOptions(
    'genome=s'    => \$fn_genome,
    'threads=i'   => \$threads,
    'outfile=s'   => \$fn_outfile,
    'consensus=s' => \$fn_consensus,
    'fast5=s'     => \$fn_fast5,
);

my $tmp_dir = 'tmp_dir';
mkdir $tmp_dir;

#extract FAST5 files to path where they are expected
my $in_dir = 'in_dir';
mkdir $in_dir;
my $cwd = abs_path( getcwd() );
chdir $in_dir;
my $tar = Archive::Tar->new();
$tar->read($fn_fast5);
$tar->extract();
chdir $cwd;

my @cmd = @ARGV;
unshift @cmd, 'nanopolish';
push @cmd, '--genome', $fn_genome;

say join ' ', @cmd;

my @regions :shared;

# build region tags to pass to nanopolish
my $parser = BioX::Seq::Stream->new($fn_genome);
while (my $seq = $parser->next_seq) {
    push @regions, join( ':', $seq->id,
        join( '-', 1, length($seq) ),
    );
}

my @workers;
for (1..$threads) {
    push @workers, threads->create(\&run);
}

$_->join() for (@workers);

my @fa_files  = glob "$tmp_dir/*.fasta";
my @out_files = glob "$tmp_dir/*.vcf";

open my $out_cons, '>', $fn_consensus
    or die "Failed to open output consensus: $!";
for (@fa_files) {
    open my $in, '<', $_;
    while (my $line = <$in>) {
        print {$out_cons} $line;
    }
    close $in;
}
close $out_cons;

# we may need to do extra processing on VCF output
open my $out_vcf, '>', $fn_outfile
    or die "Failed to open output file: $!";
for my $i (0..$#out_files) {
    my $v = $out_files[$i];
    open my $in, '<', $v;
    while (my $line = <$in>) {
        next if ($line =~ /^\s*#/ && $i > 0);
        print {$out_vcf} $line;
    }
    close $in;
}
close $out_vcf;


sub run {

    LOOP:
    while (1) {

        my $tag;

        {
            lock @regions;
            last LOOP if (! scalar @regions);
            $tag = shift @regions;
        }

        my $fn_out  = "$tmp_dir/$tag.vcf";
        my $fn_cons = "$tmp_dir/$tag.fasta";

        my @cmd_local = @cmd;
        push @cmd_local, '--window', $tag;
        push @cmd_local, '--outfile',   $fn_out;
        push @cmd_local, '--consensus', $fn_cons;

        my $ret = system @cmd_local;
        warn "RET: $ret ($!)\n";

        my $cmd_string = join ' ', @cmd_local;
        die "Non-zero exit value for command: $cmd_string\n"
            if ($ret);

    }

}