Mercurial > repos > elixir-it > vinyl_optimizer
diff old_version/score_complete_alt.pl @ 2:6e4eb4856874 draft
Uploaded
| author | elixir-it |
|---|---|
| date | Wed, 22 Jul 2020 19:20:30 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/old_version/score_complete_alt.pl Wed Jul 22 19:20:30 2020 +0000 @@ -0,0 +1,296 @@ +#!/usr/bin/perl -w +#use strict; + +%arguments= +( +"vcf"=>"", #file mandatory, provided at runtime +"disease"=>"", #name optional +"similarD"=>"", #file optional +"lgenes"=>"", #file optional +"leQTL"=>"qfile", #file mandatory, but default value +"keywords"=>"kfile", #file mandatory, but default value +"effects"=>"efile", #file mandatory, but default value +"disease_clinvar"=>8, #numeric mandadory, but default value +"score_AF"=>4, #numeric mandatory, but default value +"score_functional"=>8, #numeric mandatory, but default value +"score_NS"=>6, #numeric mandatory, but default value +"score_nIND"=>8, #numeric mandatory, but default value +"AF"=>0.0001, #numeric mandatory, but default value +"scoreeQTL"=>1, #numeric mandatory, but default value +"nind"=>5, #numeric mandatory, but default value +"scoreG"=>2, #numeric mandatory, but default value +#####OUTPUT file############################################# +"ofile"=>"final_res.csv", #file #OUTPUT #tabulare +"ovcfile"=>"final_res.vcf" #file #OUTOUT #vcf +); + + +@arguments=@ARGV; +for ($i=0;$i<=$#ARGV;$i+=2) +{ + $act=$ARGV[$i]; + $act=~s/-//g; + $val=$ARGV[$i+1]; + if (exists $arguments{$act}) + { + $arguments{$act}=$val; + }else{ + warn("$act: unknown argument\n"); + @valid=keys %arguments; + warn("Valid arguments are @valid\n"); + die("All those moments will be lost in time, like tears in rain.\n Time to die!\n"); + } + #print "$act $val\n"; +} + +$ofile_name=$arguments{"ofile"}; +open(O,">$ofile_name"); +$ovcfile=$arguments{"ovcfile"}; +open(OV,">$ovcfile"); + + +$file=$arguments{"vcf"} ; +die ("input file $file not found!\n") unless -e $file; + +$lgenes=$arguments{"lgenes"}; +if (-e $lgenes) +{ + open(IN,$lgenes); + while(<IN>) + { + chomp; + $Lgenes{$_}=1; + } +} + + +$kfile=$arguments{"keywords"}; +die ("keyword file $kfile not found!\n") unless -e $kfile; +open(IN,$kfile); +while(<IN>) +{ + chomp; + $specialKeys{$_}=1; +} + + +$diseaseO=$arguments{"disease"} ? $arguments{"disease"} : "GinocchioValgoDellaLavandaiaZoppa"; + +$sfile=$arguments{"similarD"}; +if (-e $sfile) +{ + open(IN,$sfile); + while(<IN>) + { + chomp; + push(@kw,$_) + } +} + +$efile=$arguments{"effects"}; +die ("effect file $efile not found!\n") unless -e $efile; +open(IN,$efile); +while(<IN>) +{ + chomp; + $effects{$_}=1; +} + +$leQTLfile=$arguments{"leQTL"}; +if (-e $leQTLfile) +{ + open(IN,$leQTLfile); + while(<IN>) + { + chomp(); + $Qlist{$_}=1; + } +} + +$disease_clinvar=$arguments{"disease_clinvar"}; +$score_AF=$arguments{"score_AF"}; +$score_functional=$arguments{"score_functional"}; +$score_NS=$arguments{"score_NS"}; +$score_nIND=$arguments{"score_nIND"}; +$score_QTL=$arguments{"scoreeQTL"}; +$scoreG=$arguments{"scoreG"}; + +print O "CHR\tstart\tgene\tref\talt\tAC\t"; +foreach $k (sort keys %specialKeys) +{ + print O "$k\t"; +} +print O "Score\n"; + +%damaged=(); +open(IN,$file); + +#print "$gene_score $disease_HGMD $disease_clinvar $score_functional $score_AF $score_NS $score_eQTL $score_nIND\n"; +while(<IN>) +{ + if ($_=~/^#/) + { + print OV; + next; + } + chomp(); + @val=(split(/\t/)); + $chr=$val[0]; + $start=$val[1]; + $pstart=$val[2]; + $b1=$val[3]; + $b2=$val[4]; + $qscore=$val[5]; + $pb2=$val[6]; + $gene="na"; + $annot=$val[7]; + $gt=$val[8]; + @samples=@val[9..$#val]; + $samples=(join("\t",@samples)); + @terms=(split(/\;/,$annot)); + $DIS=0; + if ($_=~/;AC=(\d+);/) + { + $nind=$1; + } + if ($_=~/Gene.refGene=(\w+);/) + { + $gene=$1; + } + next if $nind==0; + $gene=(split(/\,/,$gene))[0]; + $i=0; + %keep=(); + $score=0; + $score+=$scoreG if $Lgenes{$gene}; + foreach $t (@terms) + { + ($keep,$value)=(split(/\=/,$t))[0,1]; + $keep{$keep}=$value; + } + + if ($keep{"CLNSIG"} ne "."){ + $scoreO=0; + $scoreC=0; + $add=0; + $add=$disease_clinvar if ($keep{"CLNSIG"} eq "Pathogenic" || $keep{"CLNSIG"} eq "Pathogenic,_other,_risk_factor" || $keep{"CLNSIG"} eq "pathogenic" || $keep{"CLNSIG"} eq "Pathogenic/Likely_pathogenic" ); + $add=$disease_clinvar/2 if ($keep{"CLNSIG"} eq "Likely_pathogenic" || $keep{"CLNSIG"} eq "Conflicting_interpretations_of_pathogenicity" || $keep{"CLNSIG"} eq "likely-pathogenic"); + $add=-$disease_clinvar/2 if ($keep{"CLNSIG"} eq "Likely_benign" || $keep{"CLNSIG"} eq "Benign/Likely_benign"); + $add=-$disease_clinvar if ($keep{"CLNSIG"} eq "Benign"); + @diseases=split(/\|/,$keep{"CLNDN"}); + @databases=split(/\|/,$keep{"CLNDISDB"}); + for ($i=0;$i<=$#diseases;$i++) + { + $dis=lc $diseases[$i]; + $dat=$databases[$i]; + if ($dis=~ /$diseaseO/) + { + if ($dat=~/OMIM/) + { + $scoreO=$add; + }else{ + $scoreC=$add; + } + last; + }else{ + foreach $kv (@kw) + { + if ($dis=~/$kv/) + { + $DIS=1 if $add>0; + if ($dat=~/OMIM/) + { + $scoreO=$add; + }else{ + $scoreC=$add; + } + } + } + } + } + $score+=$scoreO+$scoreC; + } + $esp=$keep{"esp6500siv2_ea"} eq "." ? 0 : $keep{"esp6500siv2_ea"}; + $g1000=$keep{"1000g2015aug_all"} eq "." ? 0 : $keep{"1000g2015aug_all"} ; + $exac=$keep{"ExAC_NFE"} eq "." ? 0 : $keep{"ExAC_NFE"}; + $gnomad=$keep{"gnomAD_exome_NFE"} eq "." ? 0 : $keep{"gnomAD_exome_NFE"}; + @AF=($esp,$g1000,$exac,$gnomad); + @AF=sort{$a<=>$b} @AF; + $AF=$AF[-1]; + # Variante molto rara 0.0001 2 punti + # Variante rara 0.004 1 punto + if ($AF<=$arguments{"AF"}) #0,00002 + { + $score+=$score_AF; + }elsif($AF>$arguments{"AF"} && $AF<=$arguments{"AF"}*4){ + $score+=$score_AF/2; + }elsif($AF>0.01){ #commonSNP + $score-=$score_AF/2; + } + + $effectO=(split(/\;/,$keep{"ExonicFunc.refGene"}))[0]; + if ($effects{$effectO}) + { + $damaged{$gene}{"D"}++; + $score+=$score_functional; + } + #print "Exon " . $keep{"ExonicFunc.refGene"}. " $score\n"; + if ($keep{"ExonicFunc.refGene"} eq "nonsynonymous_SNV"){ + + if ($keep{"MetaSVM_pred"} eq "D" && $keep{"CADD_raw"}>=5) + { + $score+=$score_NS; + $damaged{$gene}{"D"}++; + }elsif($keep{"MetaSVM_pred"} eq "D" && $keep{"CADD_raw"}<5){ + $score+=$score_NS/2; + }elsif($keep{"MetaSVM_pred"} ne "D" && $keep{"CADD_raw"}>=5){ + $score+=$score_NS/2; + } + } + #print "NS ". $keep{"ExonicFunc.refGene"} . " $score\n"; + $damaged{$gene}{"tot"}++; + if ($keep{"Func.refGene"} eq "splicing"){ + $score+=$score_functional; + } + #print "SPL ". $keep{"Func.refGene"} . " $score\n"; + foreach $QTL (keys %Qlist) + { + next unless $keep{$QTL}; + $score+=$score_QTL if ($keep{$QTL} ne "."); + } + if ($nind>=$arguments{"nind"} && $AF<=0.01) + { + $score+=$score_nIND; + }elsif($nind>=$arguments{"nind"}/2 && $nind<$arguments{"nind"} && $AF<=0.01){ + $score+=$score_nIND/2; + } + chomp(); + #$scores{$score}++; + chop($_); + $outL=""; + foreach $k (sort keys %specialKeys) + { + #die("$k\n") unless $keep{$k}; + $outL.="$keep{$k}\t"; + } + print O "$chr\t$start\t$gene\t$b1\t$b2\t$nind\t$outL$score\n"; #if $score>=12 || $DIS==1; + print OV "$chr\t$start\t$pstart\t$b1\t$b2\t$qscore\t$pb2\t$annot;COSO_score=$score\t$gt\t$samples\n"; +} +#open(O,">$file.gc.list.csv"); +#foreach $gene (sort{$a<=>$b} keys %G) +#{ +# $pos=$G{$gene}{"GR"} ? $G{$gene}{"GR"} : 0; +# $neg=$G{$gene}{"LW"} ? $G{$gene}{"LW"} : 0; + #print O "$gene $pos $neg\n"; + ##$Tscore+=$scores{$score} if $score>=$coff; +#} + +#open(D,">$file\_damaged.csv"); +#foreach $gene (sort{$a<=>$b} keys %damaged) +#{ +# $D=$damaged{$gene}{"D"} ? $damaged{$gene}{"D"} : 0; +# $T=$damaged{$gene}{"tot"}; + #print D "$gene $D $T\n"; +#} + +#print "$Tscore\n";
