annotate 14_june_18_intersect.indels.with.infos.pl @ 0:482e911975a1 draft default tip

Uploaded
author elixir-it
date Thu, 08 Nov 2018 12:54:29 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
1 #!/usr/bin/perl -w
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
2 #
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
3 use strict;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
4
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
5 my $f1=shift; #vcf di varscan
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
6 my $f2=shift; #vcf di freebayes
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
7 my $f3=shift; #vcf di GATK
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
8 my $fout=shift;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
9
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
10 die("non trovo il file $f1") unless -e $f1;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
11 die("non trovo il file $f2") unless -e $f2;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
12 die("non trovo il file $f2") unless -e $f3;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
13
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
14 die ("non specificato il file di output") unless $fout;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
15
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
16 my %ALL=();
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
17 my %L=();
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
18 my %H=();
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
19
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
20 open(OUT,">$fout.common.indels.vcf");
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
21 open(UN,">$fout.unique.indels.vcf");
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
22
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
23 open(IN,$f1);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
24 while(<IN>)
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
25 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
26 next if $_=~/^\#/;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
27 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4];
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
28 my $sum=length($b1)+length($b2);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
29 next if $sum<=2;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
30 next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) );
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
31 $H{$chr}{$st}++;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
32 $ALL{$chr}{$st}=$_;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
33 push(@{$L{$chr}{$st}},"VS");
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
34 }
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
35 close(IN);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
36
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
37 open(IN,$f2);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
38 while(<IN>)
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
39 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
40 next if $_=~/^\#/;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
41 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4];
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
42 my $sum=length($b1)+length($b2);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
43 next if $sum<=2;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
44 next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) );
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
45 $H{$chr}{$st}++;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
46 $ALL{$chr}{$st}=$_;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
47 push(@{$L{$chr}{$st}},"FB");
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
48 }
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
49 close(IN);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
50
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
51 open(IN,$f3);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
52 while(<IN>)
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
53 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
54 if ($_=~/^\#/)
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
55 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
56 print OUT if $_=~/fileformat/;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
57 print OUT if $_=~/contig/;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
58 print OUT if $_=~/CHROM/;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
59 print UN if $_=~/fileformat/;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
60 print UN if $_=~/contig/;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
61 print UN if $_=~/CHROM/;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
62 next;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
63 }
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
64 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4];
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
65 my $sum=length($b1)+length($b2);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
66 next if $sum<=2;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
67 next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) );
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
68 $H{$chr}{$st}++;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
69 $ALL{$chr}{$st}=$_;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
70 push(@{$L{$chr}{$st}},"GA");
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
71 }
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
72 close(IN);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
73
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
74
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
75 foreach my $chr (sort keys %H)
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
76 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
77 foreach my $s (sort{$a<=>$b} keys %{$H{$chr}})
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
78 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
79 my $vl=$H{$chr}{$s};
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
80 unless ($vl)
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
81 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
82 warn("non so contare le occorrenze di $chr $s\n");
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
83 next;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
84 }
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
85 my $out_vl=$ALL{$chr}{$s};
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
86 unless ($out_vl)
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
87 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
88 warn("non so trovare le info di $chr $s\n");
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
89 next;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
90 }
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
91 my @outs=(split(/\t/,$out_vl));
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
92 my $comm=$outs[7];
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
93 if ($vl>=2)
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
94 {
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
95 my $mm=$L{$chr}{$s} ? join("_",@{$L{$chr}{$s}}) : "NA";
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
96 my $comm2=$comm;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
97 $comm2.=";NM=$vl;LM=$mm";
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
98 $outs[7]=$comm2;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
99 $out_vl=join("\t",@outs);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
100 print OUT $out_vl;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
101 }else{
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
102 my $mm=$L{$chr}{$s} ? $L{$chr}{$s}[0] : "NA";
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
103 my $comm2=$comm;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
104 $comm2.=";NM=$vl;LM=$mm";
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
105 $outs[7]=$comm2;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
106 $out_vl=join("\t",@outs);
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
107 print UN $out_vl;
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
108 }
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
109 }
482e911975a1 Uploaded
elixir-it
parents:
diff changeset
110 }