Mercurial > repos > elixir-it > covacs_intersect_snps
comparison 14_june_18_intersect.snps.with.infos.pl @ 0:3edc7bb490d3 draft default tip
Uploaded
| author | elixir-it |
|---|---|
| date | Thu, 08 Nov 2018 12:56:30 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3edc7bb490d3 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 # | |
| 3 | |
| 4 use strict; | |
| 5 | |
| 6 | |
| 7 my $f1=shift; #varscan | |
| 8 my $f2=shift; #freebayes | |
| 9 my $f3=shift; #GATK | |
| 10 my $fout=shift; | |
| 11 my %ALL=(); | |
| 12 my %L=(); | |
| 13 my %H=(); | |
| 14 | |
| 15 die("non trovo il file $f1") unless -e $f1; | |
| 16 die("non trovo il file $f2") unless -e $f2; | |
| 17 die("non trovo il file $f2") unless -e $f3; | |
| 18 | |
| 19 die ("non specificato il file di output") unless $fout; | |
| 20 | |
| 21 | |
| 22 open(OUT,">$fout.common.snps.vcf"); | |
| 23 open(UN,">$fout.unique.snps.vcf"); | |
| 24 | |
| 25 open(IN,$f1); | |
| 26 while(<IN>) | |
| 27 { | |
| 28 next if $_=~/^\#/; | |
| 29 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; | |
| 30 my @all_vl=(split(/\t/)); | |
| 31 next unless (length($b1)==length($b2)); | |
| 32 next if ($b1=~/\./ || $b1=~/\-/ || $b2=~/\./ || $b2=~/\-/); | |
| 33 my @b1=(split('',$b1)); | |
| 34 my @b2=(split('',$b2)); | |
| 35 for (my $i=0;$i<=$#b1;$i++) | |
| 36 { | |
| 37 my $lb1=$b1[$i]; | |
| 38 my $lb2=$b2[$i]; | |
| 39 next if $lb1 eq $lb2; | |
| 40 next if ($lb1=~/\./ || $lb1=~/\-/ || $lb2=~/\./ || $lb2=~/\-/ || $lb2=~/\,/); | |
| 41 my $lst=$st+$i; | |
| 42 $all_vl[1]=$lst; | |
| 43 $all_vl[3]=$lb1; | |
| 44 $all_vl[4]=$lb2; | |
| 45 my $J=join("\t",@all_vl); | |
| 46 $H{$chr}{$lst}{$lb1}{$lb2}++; | |
| 47 $ALL{$chr}{$lst}{$lb1}{$lb2}=$J; | |
| 48 push(@{$L{$chr}{$lst}{$lb1}{$lb2}},"VS"); | |
| 49 } | |
| 50 } | |
| 51 close(IN); | |
| 52 | |
| 53 open(IN,$f2); | |
| 54 while(<IN>) | |
| 55 { | |
| 56 next if $_=~/^\#/; | |
| 57 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; | |
| 58 my @all_vl=(split(/\t/)); | |
| 59 next unless (length($b1)==length($b2)); | |
| 60 next if ($b1=~/\./ || $b1=~/\-/ || $b2=~/\./ || $b2=~/\-/); | |
| 61 my @b1=(split('',$b1)); | |
| 62 my @b2=(split('',$b2)); | |
| 63 | |
| 64 for (my $i=0;$i<=$#b1;$i++) | |
| 65 { | |
| 66 my $lb1=$b1[$i]; | |
| 67 my $lb2=$b2[$i]; | |
| 68 next if $lb1 eq $lb2; | |
| 69 next if ($lb1=~/\./ || $lb1=~/\-/ || $lb2=~/\./ || $lb2=~/\-/ || $lb2=~/\,/); | |
| 70 my $lst=$st+$i; | |
| 71 $all_vl[1]=$lst; | |
| 72 $all_vl[3]=$lb1; | |
| 73 $all_vl[4]=$lb2; | |
| 74 my $J=join("\t",@all_vl); | |
| 75 $H{$chr}{$lst}{$lb1}{$lb2}++; | |
| 76 $ALL{$chr}{$lst}{$lb1}{$lb2}=$J; | |
| 77 push(@{$L{$chr}{$lst}{$lb1}{$lb2}},"FB"); | |
| 78 } | |
| 79 | |
| 80 } | |
| 81 close(IN); | |
| 82 | |
| 83 open(IN,$f3); | |
| 84 while(<IN>) | |
| 85 { | |
| 86 if ($_=~/^\#/) | |
| 87 { | |
| 88 print OUT if $_=~/fileformat/; | |
| 89 print OUT if $_=~/contig/; | |
| 90 print OUT if $_=~/CHROM/; | |
| 91 print UN if $_=~/fileformat/; | |
| 92 print UN if $_=~/contig/; | |
| 93 print UN if $_=~/CHROM/; | |
| 94 next; | |
| 95 } | |
| 96 my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; | |
| 97 my @all_vl=(split(/\t/)); | |
| 98 next unless (length($b1)==length($b2)); | |
| 99 next if ($b1=~/\./ || $b1=~/\-/ || $b2=~/\./ || $b2=~/\-/); | |
| 100 my @b1=(split('',$b1)); | |
| 101 my @b2=(split('',$b2)); | |
| 102 | |
| 103 for (my $i=0;$i<=$#b1;$i++) | |
| 104 { | |
| 105 my $lb1=$b1[$i]; | |
| 106 my $lb2=$b2[$i]; | |
| 107 next if $lb1 eq $lb2; | |
| 108 next if ($lb1=~/\./ || $lb1=~/\-/ || $lb2=~/\./ || $lb2=~/\-/ || $lb2=~/\,/); | |
| 109 my $lst=$st+$i; | |
| 110 $all_vl[1]=$lst; | |
| 111 $all_vl[3]=$lb1; | |
| 112 $all_vl[4]=$lb2; | |
| 113 my $J=join("\t",@all_vl); | |
| 114 $H{$chr}{$lst}{$lb1}{$lb2}++; | |
| 115 $ALL{$chr}{$lst}{$lb1}{$lb2}=$J; | |
| 116 push(@{$L{$chr}{$lst}{$lb1}{$lb2}},"GA"); | |
| 117 } | |
| 118 } | |
| 119 close(IN); | |
| 120 | |
| 121 foreach my $chr (sort keys %H) | |
| 122 { | |
| 123 foreach my $s (sort{$a<=>$b} keys %{$H{$chr}}) | |
| 124 { | |
| 125 foreach my $b1 (keys %{$H{$chr}{$s}}) | |
| 126 { | |
| 127 foreach my $b2 (keys %{$H{$chr}{$s}{$b1}}) | |
| 128 { | |
| 129 | |
| 130 my $vl=$H{$chr}{$s}{$b1}{$b2}; | |
| 131 unless ($H{$chr}{$s}{$b1}{$b2}) | |
| 132 { | |
| 133 warn("non trovo il numero di occorrenze di $chr $s $b1 $b2\n"); | |
| 134 next; | |
| 135 } | |
| 136 my $out_vl=$ALL{$chr}{$s}{$b1}{$b2}; | |
| 137 | |
| 138 unless ($ALL{$chr}{$s}{$b1}{$b2}) | |
| 139 { | |
| 140 warn("non trovo le info di $chr $s $b1 $b2\n"); | |
| 141 next; | |
| 142 } | |
| 143 my @vls=(split(/\s+/,$out_vl)); | |
| 144 my $comm=$vls[7]; | |
| 145 | |
| 146 if ($vl>=2) | |
| 147 { | |
| 148 my $mm=join("_",@{$L{$chr}{$s}{$b1}{$b2}}); | |
| 149 my $comm2=$comm; | |
| 150 $comm2.=";NM=$vl;LM=$mm"; | |
| 151 $vls[7]=$comm2; | |
| 152 my $out_vl=join("\t",@vls); | |
| 153 print OUT "$out_vl\n"; | |
| 154 | |
| 155 }else{ | |
| 156 my $mm=$L{$chr}{$s}{$b1}{$b2}[0]; | |
| 157 my $comm2=$comm; | |
| 158 $comm2.=";NM=$vl;LM=$mm"; | |
| 159 $vls[7]=$comm2; | |
| 160 my $out_vl=join("\t",@vls); | |
| 161 print UN "$out_vl\n"; | |
| 162 | |
| 163 } | |
| 164 } | |
| 165 } | |
| 166 } | |
| 167 } | |
| 168 | |
| 169 #foreach $V (keys %V) | |
| 170 #{ | |
| 171 # print "$V $V{$V}\n"; | |
| 172 #} |
