Mercurial > repos > elixir-it > covacs_intersect_indels
comparison 14_june_18_intersect.indels.with.infos.pl @ 0:482e911975a1 draft default tip
Uploaded
| author | elixir-it |
|---|---|
| date | Thu, 08 Nov 2018 12:54:29 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:482e911975a1 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 # | |
| 3 use strict; | |
| 4 | |
| 5 my $f1=shift; #vcf di varscan | |
| 6 my $f2=shift; #vcf di freebayes | |
| 7 my $f3=shift; #vcf di GATK | |
| 8 my $fout=shift; | |
| 9 | |
| 10 die("non trovo il file $f1") unless -e $f1; | |
| 11 die("non trovo il file $f2") unless -e $f2; | |
| 12 die("non trovo il file $f2") unless -e $f3; | |
| 13 | |
| 14 die ("non specificato il file di output") unless $fout; | |
| 15 | |
| 16 my %ALL=(); | |
| 17 my %L=(); | |
| 18 my %H=(); | |
| 19 | |
| 20 open(OUT,">$fout.common.indels.vcf"); | |
| 21 open(UN,">$fout.unique.indels.vcf"); | |
| 22 | |
| 23 open(IN,$f1); | |
| 24 while(<IN>) | |
| 25 { | |
| 26 next if $_=~/^\#/; | |
| 27 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; | |
| 28 my $sum=length($b1)+length($b2); | |
| 29 next if $sum<=2; | |
| 30 next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) ); | |
| 31 $H{$chr}{$st}++; | |
| 32 $ALL{$chr}{$st}=$_; | |
| 33 push(@{$L{$chr}{$st}},"VS"); | |
| 34 } | |
| 35 close(IN); | |
| 36 | |
| 37 open(IN,$f2); | |
| 38 while(<IN>) | |
| 39 { | |
| 40 next if $_=~/^\#/; | |
| 41 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; | |
| 42 my $sum=length($b1)+length($b2); | |
| 43 next if $sum<=2; | |
| 44 next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) ); | |
| 45 $H{$chr}{$st}++; | |
| 46 $ALL{$chr}{$st}=$_; | |
| 47 push(@{$L{$chr}{$st}},"FB"); | |
| 48 } | |
| 49 close(IN); | |
| 50 | |
| 51 open(IN,$f3); | |
| 52 while(<IN>) | |
| 53 { | |
| 54 if ($_=~/^\#/) | |
| 55 { | |
| 56 print OUT if $_=~/fileformat/; | |
| 57 print OUT if $_=~/contig/; | |
| 58 print OUT if $_=~/CHROM/; | |
| 59 print UN if $_=~/fileformat/; | |
| 60 print UN if $_=~/contig/; | |
| 61 print UN if $_=~/CHROM/; | |
| 62 next; | |
| 63 } | |
| 64 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; | |
| 65 my $sum=length($b1)+length($b2); | |
| 66 next if $sum<=2; | |
| 67 next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) ); | |
| 68 $H{$chr}{$st}++; | |
| 69 $ALL{$chr}{$st}=$_; | |
| 70 push(@{$L{$chr}{$st}},"GA"); | |
| 71 } | |
| 72 close(IN); | |
| 73 | |
| 74 | |
| 75 foreach my $chr (sort keys %H) | |
| 76 { | |
| 77 foreach my $s (sort{$a<=>$b} keys %{$H{$chr}}) | |
| 78 { | |
| 79 my $vl=$H{$chr}{$s}; | |
| 80 unless ($vl) | |
| 81 { | |
| 82 warn("non so contare le occorrenze di $chr $s\n"); | |
| 83 next; | |
| 84 } | |
| 85 my $out_vl=$ALL{$chr}{$s}; | |
| 86 unless ($out_vl) | |
| 87 { | |
| 88 warn("non so trovare le info di $chr $s\n"); | |
| 89 next; | |
| 90 } | |
| 91 my @outs=(split(/\t/,$out_vl)); | |
| 92 my $comm=$outs[7]; | |
| 93 if ($vl>=2) | |
| 94 { | |
| 95 my $mm=$L{$chr}{$s} ? join("_",@{$L{$chr}{$s}}) : "NA"; | |
| 96 my $comm2=$comm; | |
| 97 $comm2.=";NM=$vl;LM=$mm"; | |
| 98 $outs[7]=$comm2; | |
| 99 $out_vl=join("\t",@outs); | |
| 100 print OUT $out_vl; | |
| 101 }else{ | |
| 102 my $mm=$L{$chr}{$s} ? $L{$chr}{$s}[0] : "NA"; | |
| 103 my $comm2=$comm; | |
| 104 $comm2.=";NM=$vl;LM=$mm"; | |
| 105 $outs[7]=$comm2; | |
| 106 $out_vl=join("\t",@outs); | |
| 107 print UN $out_vl; | |
| 108 } | |
| 109 } | |
| 110 } |
