comparison nanopolish_variants.pl @ 0:2136c2725fc4 draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
author jdv
date Wed, 06 Sep 2017 12:15:45 -0400
parents
children a1d433401bc2
comparison
equal deleted inserted replaced
-1:000000000000 0:2136c2725fc4
1 #!/usr/bin/env perl
2
3 use strict;
4 use warnings;
5 use 5.012;
6
7 use Archive::Tar;
8 use Cwd qw/getcwd abs_path/;
9 use File::Copy qw/copy/;
10 use Getopt::Long qw/:config pass_through/;
11 use threads;
12 use threads::shared;
13 use BioX::Seq::Stream;
14
15 my $fn_genome;
16 my $threads = 1;
17 my $fn_outfile;
18 my $fn_consensus;
19 my $fn_fast5;
20
21 # remember full command string (with proper binary)
22
23 # parse genome filename and add back to arg stack
24 GetOptions(
25 'genome=s' => \$fn_genome,
26 'threads=i' => \$threads,
27 'outfile=s' => \$fn_outfile,
28 'consensus=s' => \$fn_consensus,
29 'fast5=s' => \$fn_fast5,
30 );
31
32 my $tmp_dir = 'tmp_dir';
33 mkdir $tmp_dir;
34
35 #extract FAST5 files to path where they are expected
36 my $in_dir = 'in_dir';
37 mkdir $in_dir;
38 my $cwd = abs_path( getcwd() );
39 chdir $in_dir;
40 my $tar = Archive::Tar->new();
41 $tar->read($fn_fast5);
42 $tar->extract();
43 chdir $cwd;
44
45 my @cmd = @ARGV;
46 unshift @cmd, 'nanopolish';
47 push @cmd, '--genome', $fn_genome;
48
49 say join ' ', @cmd;
50
51 my @regions :shared;
52
53 # build region tags to pass to nanopolish
54 my $parser = BioX::Seq::Stream->new($fn_genome);
55 while (my $seq = $parser->next_seq) {
56 push @regions, join( ':', $seq->id,
57 join( '-', 1, length($seq) ),
58 );
59 }
60
61 my @workers;
62 for (1..$threads) {
63 push @workers, threads->create(\&run);
64 }
65
66 $_->join() for (@workers);
67
68 my @fa_files = glob "$tmp_dir/*.fasta";
69 my @out_files = glob "$tmp_dir/*.vcf";
70
71 open my $out_cons, '>', $fn_consensus
72 or die "Failed to open output consensus: $!";
73 for (@fa_files) {
74 open my $in, '<', $_;
75 while (my $line = <$in>) {
76 print {$out_cons} $line;
77 }
78 close $in;
79 }
80 close $out_cons;
81
82 # we may need to do extra processing on VCF output
83 open my $out_vcf, '>', $fn_outfile
84 or die "Failed to open output file: $!";
85 for my $i (0..$#out_files) {
86 my $v = $out_files[$i];
87 open my $in, '<', $v;
88 while (my $line = <$in>) {
89 next if ($line =~ /^\s*#/ && $i > 0);
90 print {$out_vcf} $line;
91 }
92 close $in;
93 }
94 close $out_vcf;
95
96
97 sub run {
98
99 LOOP:
100 while (1) {
101
102 my $tag;
103
104 {
105 lock @regions;
106 last LOOP if (! scalar @regions);
107 $tag = shift @regions;
108 }
109
110 my $fn_out = "$tmp_dir/$tag.vcf";
111 my $fn_cons = "$tmp_dir/$tag.fasta";
112
113 my @cmd_local = @cmd;
114 push @cmd_local, '--window', $tag;
115 push @cmd_local, '--outfile', $fn_out;
116 push @cmd_local, '--consensus', $fn_cons;
117
118 my $ret = system @cmd_local;
119 warn "RET: $ret ($!)\n";
120
121 my $cmd_string = join ' ', @cmd_local;
122 die "Non-zero exit value for command: $cmd_string\n"
123 if ($ret);
124
125 }
126
127 }