Mercurial > repos > elixir-it > vinyl_optimizer
comparison old_version/score_complete_alt.pl @ 2:6e4eb4856874 draft
Uploaded
| author | elixir-it |
|---|---|
| date | Wed, 22 Jul 2020 19:20:30 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:35c308dd6420 | 2:6e4eb4856874 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 #use strict; | |
| 3 | |
| 4 %arguments= | |
| 5 ( | |
| 6 "vcf"=>"", #file mandatory, provided at runtime | |
| 7 "disease"=>"", #name optional | |
| 8 "similarD"=>"", #file optional | |
| 9 "lgenes"=>"", #file optional | |
| 10 "leQTL"=>"qfile", #file mandatory, but default value | |
| 11 "keywords"=>"kfile", #file mandatory, but default value | |
| 12 "effects"=>"efile", #file mandatory, but default value | |
| 13 "disease_clinvar"=>8, #numeric mandadory, but default value | |
| 14 "score_AF"=>4, #numeric mandatory, but default value | |
| 15 "score_functional"=>8, #numeric mandatory, but default value | |
| 16 "score_NS"=>6, #numeric mandatory, but default value | |
| 17 "score_nIND"=>8, #numeric mandatory, but default value | |
| 18 "AF"=>0.0001, #numeric mandatory, but default value | |
| 19 "scoreeQTL"=>1, #numeric mandatory, but default value | |
| 20 "nind"=>5, #numeric mandatory, but default value | |
| 21 "scoreG"=>2, #numeric mandatory, but default value | |
| 22 #####OUTPUT file############################################# | |
| 23 "ofile"=>"final_res.csv", #file #OUTPUT #tabulare | |
| 24 "ovcfile"=>"final_res.vcf" #file #OUTOUT #vcf | |
| 25 ); | |
| 26 | |
| 27 | |
| 28 @arguments=@ARGV; | |
| 29 for ($i=0;$i<=$#ARGV;$i+=2) | |
| 30 { | |
| 31 $act=$ARGV[$i]; | |
| 32 $act=~s/-//g; | |
| 33 $val=$ARGV[$i+1]; | |
| 34 if (exists $arguments{$act}) | |
| 35 { | |
| 36 $arguments{$act}=$val; | |
| 37 }else{ | |
| 38 warn("$act: unknown argument\n"); | |
| 39 @valid=keys %arguments; | |
| 40 warn("Valid arguments are @valid\n"); | |
| 41 die("All those moments will be lost in time, like tears in rain.\n Time to die!\n"); | |
| 42 } | |
| 43 #print "$act $val\n"; | |
| 44 } | |
| 45 | |
| 46 $ofile_name=$arguments{"ofile"}; | |
| 47 open(O,">$ofile_name"); | |
| 48 $ovcfile=$arguments{"ovcfile"}; | |
| 49 open(OV,">$ovcfile"); | |
| 50 | |
| 51 | |
| 52 $file=$arguments{"vcf"} ; | |
| 53 die ("input file $file not found!\n") unless -e $file; | |
| 54 | |
| 55 $lgenes=$arguments{"lgenes"}; | |
| 56 if (-e $lgenes) | |
| 57 { | |
| 58 open(IN,$lgenes); | |
| 59 while(<IN>) | |
| 60 { | |
| 61 chomp; | |
| 62 $Lgenes{$_}=1; | |
| 63 } | |
| 64 } | |
| 65 | |
| 66 | |
| 67 $kfile=$arguments{"keywords"}; | |
| 68 die ("keyword file $kfile not found!\n") unless -e $kfile; | |
| 69 open(IN,$kfile); | |
| 70 while(<IN>) | |
| 71 { | |
| 72 chomp; | |
| 73 $specialKeys{$_}=1; | |
| 74 } | |
| 75 | |
| 76 | |
| 77 $diseaseO=$arguments{"disease"} ? $arguments{"disease"} : "GinocchioValgoDellaLavandaiaZoppa"; | |
| 78 | |
| 79 $sfile=$arguments{"similarD"}; | |
| 80 if (-e $sfile) | |
| 81 { | |
| 82 open(IN,$sfile); | |
| 83 while(<IN>) | |
| 84 { | |
| 85 chomp; | |
| 86 push(@kw,$_) | |
| 87 } | |
| 88 } | |
| 89 | |
| 90 $efile=$arguments{"effects"}; | |
| 91 die ("effect file $efile not found!\n") unless -e $efile; | |
| 92 open(IN,$efile); | |
| 93 while(<IN>) | |
| 94 { | |
| 95 chomp; | |
| 96 $effects{$_}=1; | |
| 97 } | |
| 98 | |
| 99 $leQTLfile=$arguments{"leQTL"}; | |
| 100 if (-e $leQTLfile) | |
| 101 { | |
| 102 open(IN,$leQTLfile); | |
| 103 while(<IN>) | |
| 104 { | |
| 105 chomp(); | |
| 106 $Qlist{$_}=1; | |
| 107 } | |
| 108 } | |
| 109 | |
| 110 $disease_clinvar=$arguments{"disease_clinvar"}; | |
| 111 $score_AF=$arguments{"score_AF"}; | |
| 112 $score_functional=$arguments{"score_functional"}; | |
| 113 $score_NS=$arguments{"score_NS"}; | |
| 114 $score_nIND=$arguments{"score_nIND"}; | |
| 115 $score_QTL=$arguments{"scoreeQTL"}; | |
| 116 $scoreG=$arguments{"scoreG"}; | |
| 117 | |
| 118 print O "CHR\tstart\tgene\tref\talt\tAC\t"; | |
| 119 foreach $k (sort keys %specialKeys) | |
| 120 { | |
| 121 print O "$k\t"; | |
| 122 } | |
| 123 print O "Score\n"; | |
| 124 | |
| 125 %damaged=(); | |
| 126 open(IN,$file); | |
| 127 | |
| 128 #print "$gene_score $disease_HGMD $disease_clinvar $score_functional $score_AF $score_NS $score_eQTL $score_nIND\n"; | |
| 129 while(<IN>) | |
| 130 { | |
| 131 if ($_=~/^#/) | |
| 132 { | |
| 133 print OV; | |
| 134 next; | |
| 135 } | |
| 136 chomp(); | |
| 137 @val=(split(/\t/)); | |
| 138 $chr=$val[0]; | |
| 139 $start=$val[1]; | |
| 140 $pstart=$val[2]; | |
| 141 $b1=$val[3]; | |
| 142 $b2=$val[4]; | |
| 143 $qscore=$val[5]; | |
| 144 $pb2=$val[6]; | |
| 145 $gene="na"; | |
| 146 $annot=$val[7]; | |
| 147 $gt=$val[8]; | |
| 148 @samples=@val[9..$#val]; | |
| 149 $samples=(join("\t",@samples)); | |
| 150 @terms=(split(/\;/,$annot)); | |
| 151 $DIS=0; | |
| 152 if ($_=~/;AC=(\d+);/) | |
| 153 { | |
| 154 $nind=$1; | |
| 155 } | |
| 156 if ($_=~/Gene.refGene=(\w+);/) | |
| 157 { | |
| 158 $gene=$1; | |
| 159 } | |
| 160 next if $nind==0; | |
| 161 $gene=(split(/\,/,$gene))[0]; | |
| 162 $i=0; | |
| 163 %keep=(); | |
| 164 $score=0; | |
| 165 $score+=$scoreG if $Lgenes{$gene}; | |
| 166 foreach $t (@terms) | |
| 167 { | |
| 168 ($keep,$value)=(split(/\=/,$t))[0,1]; | |
| 169 $keep{$keep}=$value; | |
| 170 } | |
| 171 | |
| 172 if ($keep{"CLNSIG"} ne "."){ | |
| 173 $scoreO=0; | |
| 174 $scoreC=0; | |
| 175 $add=0; | |
| 176 $add=$disease_clinvar if ($keep{"CLNSIG"} eq "Pathogenic" || $keep{"CLNSIG"} eq "Pathogenic,_other,_risk_factor" || $keep{"CLNSIG"} eq "pathogenic" || $keep{"CLNSIG"} eq "Pathogenic/Likely_pathogenic" ); | |
| 177 $add=$disease_clinvar/2 if ($keep{"CLNSIG"} eq "Likely_pathogenic" || $keep{"CLNSIG"} eq "Conflicting_interpretations_of_pathogenicity" || $keep{"CLNSIG"} eq "likely-pathogenic"); | |
| 178 $add=-$disease_clinvar/2 if ($keep{"CLNSIG"} eq "Likely_benign" || $keep{"CLNSIG"} eq "Benign/Likely_benign"); | |
| 179 $add=-$disease_clinvar if ($keep{"CLNSIG"} eq "Benign"); | |
| 180 @diseases=split(/\|/,$keep{"CLNDN"}); | |
| 181 @databases=split(/\|/,$keep{"CLNDISDB"}); | |
| 182 for ($i=0;$i<=$#diseases;$i++) | |
| 183 { | |
| 184 $dis=lc $diseases[$i]; | |
| 185 $dat=$databases[$i]; | |
| 186 if ($dis=~ /$diseaseO/) | |
| 187 { | |
| 188 if ($dat=~/OMIM/) | |
| 189 { | |
| 190 $scoreO=$add; | |
| 191 }else{ | |
| 192 $scoreC=$add; | |
| 193 } | |
| 194 last; | |
| 195 }else{ | |
| 196 foreach $kv (@kw) | |
| 197 { | |
| 198 if ($dis=~/$kv/) | |
| 199 { | |
| 200 $DIS=1 if $add>0; | |
| 201 if ($dat=~/OMIM/) | |
| 202 { | |
| 203 $scoreO=$add; | |
| 204 }else{ | |
| 205 $scoreC=$add; | |
| 206 } | |
| 207 } | |
| 208 } | |
| 209 } | |
| 210 } | |
| 211 $score+=$scoreO+$scoreC; | |
| 212 } | |
| 213 $esp=$keep{"esp6500siv2_ea"} eq "." ? 0 : $keep{"esp6500siv2_ea"}; | |
| 214 $g1000=$keep{"1000g2015aug_all"} eq "." ? 0 : $keep{"1000g2015aug_all"} ; | |
| 215 $exac=$keep{"ExAC_NFE"} eq "." ? 0 : $keep{"ExAC_NFE"}; | |
| 216 $gnomad=$keep{"gnomAD_exome_NFE"} eq "." ? 0 : $keep{"gnomAD_exome_NFE"}; | |
| 217 @AF=($esp,$g1000,$exac,$gnomad); | |
| 218 @AF=sort{$a<=>$b} @AF; | |
| 219 $AF=$AF[-1]; | |
| 220 # Variante molto rara 0.0001 2 punti | |
| 221 # Variante rara 0.004 1 punto | |
| 222 if ($AF<=$arguments{"AF"}) #0,00002 | |
| 223 { | |
| 224 $score+=$score_AF; | |
| 225 }elsif($AF>$arguments{"AF"} && $AF<=$arguments{"AF"}*4){ | |
| 226 $score+=$score_AF/2; | |
| 227 }elsif($AF>0.01){ #commonSNP | |
| 228 $score-=$score_AF/2; | |
| 229 } | |
| 230 | |
| 231 $effectO=(split(/\;/,$keep{"ExonicFunc.refGene"}))[0]; | |
| 232 if ($effects{$effectO}) | |
| 233 { | |
| 234 $damaged{$gene}{"D"}++; | |
| 235 $score+=$score_functional; | |
| 236 } | |
| 237 #print "Exon " . $keep{"ExonicFunc.refGene"}. " $score\n"; | |
| 238 if ($keep{"ExonicFunc.refGene"} eq "nonsynonymous_SNV"){ | |
| 239 | |
| 240 if ($keep{"MetaSVM_pred"} eq "D" && $keep{"CADD_raw"}>=5) | |
| 241 { | |
| 242 $score+=$score_NS; | |
| 243 $damaged{$gene}{"D"}++; | |
| 244 }elsif($keep{"MetaSVM_pred"} eq "D" && $keep{"CADD_raw"}<5){ | |
| 245 $score+=$score_NS/2; | |
| 246 }elsif($keep{"MetaSVM_pred"} ne "D" && $keep{"CADD_raw"}>=5){ | |
| 247 $score+=$score_NS/2; | |
| 248 } | |
| 249 } | |
| 250 #print "NS ". $keep{"ExonicFunc.refGene"} . " $score\n"; | |
| 251 $damaged{$gene}{"tot"}++; | |
| 252 if ($keep{"Func.refGene"} eq "splicing"){ | |
| 253 $score+=$score_functional; | |
| 254 } | |
| 255 #print "SPL ". $keep{"Func.refGene"} . " $score\n"; | |
| 256 foreach $QTL (keys %Qlist) | |
| 257 { | |
| 258 next unless $keep{$QTL}; | |
| 259 $score+=$score_QTL if ($keep{$QTL} ne "."); | |
| 260 } | |
| 261 if ($nind>=$arguments{"nind"} && $AF<=0.01) | |
| 262 { | |
| 263 $score+=$score_nIND; | |
| 264 }elsif($nind>=$arguments{"nind"}/2 && $nind<$arguments{"nind"} && $AF<=0.01){ | |
| 265 $score+=$score_nIND/2; | |
| 266 } | |
| 267 chomp(); | |
| 268 #$scores{$score}++; | |
| 269 chop($_); | |
| 270 $outL=""; | |
| 271 foreach $k (sort keys %specialKeys) | |
| 272 { | |
| 273 #die("$k\n") unless $keep{$k}; | |
| 274 $outL.="$keep{$k}\t"; | |
| 275 } | |
| 276 print O "$chr\t$start\t$gene\t$b1\t$b2\t$nind\t$outL$score\n"; #if $score>=12 || $DIS==1; | |
| 277 print OV "$chr\t$start\t$pstart\t$b1\t$b2\t$qscore\t$pb2\t$annot;COSO_score=$score\t$gt\t$samples\n"; | |
| 278 } | |
| 279 #open(O,">$file.gc.list.csv"); | |
| 280 #foreach $gene (sort{$a<=>$b} keys %G) | |
| 281 #{ | |
| 282 # $pos=$G{$gene}{"GR"} ? $G{$gene}{"GR"} : 0; | |
| 283 # $neg=$G{$gene}{"LW"} ? $G{$gene}{"LW"} : 0; | |
| 284 #print O "$gene $pos $neg\n"; | |
| 285 ##$Tscore+=$scores{$score} if $score>=$coff; | |
| 286 #} | |
| 287 | |
| 288 #open(D,">$file\_damaged.csv"); | |
| 289 #foreach $gene (sort{$a<=>$b} keys %damaged) | |
| 290 #{ | |
| 291 # $D=$damaged{$gene}{"D"} ? $damaged{$gene}{"D"} : 0; | |
| 292 # $T=$damaged{$gene}{"tot"}; | |
| 293 #print D "$gene $D $T\n"; | |
| 294 #} | |
| 295 | |
| 296 #print "$Tscore\n"; |
