Mercurial > repos > elixir-it > corgat_funct_annot
comparison FunAnn/annotate.pl @ 1:40b81942976a draft default tip
Uploaded
| author | elixir-it |
|---|---|
| date | Tue, 16 Feb 2021 09:14:20 +0000 |
| parents | 7e7168ebc150 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:7e7168ebc150 | 1:40b81942976a |
|---|---|
| 1 $fss=13468; | 1 $fss=13468; |
| 2 | 2 |
| 3 unless (-e "CorGAT") | 3 unless (-e "GCF_009858895.2_ASM985889v3_genomic.fna") |
| 4 | 4 { |
| 5 { | 5 |
| 6 system("wget -i https://raw.githubusercontent.com/matteo14c/CorGAT_galaxy/dev/ann.txt"); | 6 system("wget -i https://raw.githubusercontent.com/matteo14c/CorGAT_galaxy/master/ann.txt"); |
| 7 system("gzip -d GCF_009858895.2_ASM985889v3_genomic.fna.gz"); | 7 system("gzip -d GCF_009858895.2_ASM985889v3_genomic.fna.gz"); |
| 8 } | 8 } |
| 9 | 9 |
| 10 $gen_code="genetic_code"; | 10 $gen_code="genetic_code"; |
| 11 die("need genetic code file in the current folder\n") unless -e "genetic_code"; | 11 die("need genetic code file in the current folder\n") unless -e "genetic_code"; |
| 12 open(IN,$gen_code); | 12 open(IN,$gen_code); |
| 14 { | 14 { |
| 15 ($triplet,$oneL)=(split()); | 15 ($triplet,$oneL)=(split()); |
| 16 $code{$triplet}=$oneL; | 16 $code{$triplet}=$oneL; |
| 17 } | 17 } |
| 18 | 18 |
| 19 $genome="GCF_009858895.2_ASM985889v3_genomic.fna"; | 19 $genome="GCF_009858895.2_ASM985889v3_genomic.fna "; |
| 20 die("need reference genome file in the current folder\n") unless -e "GCF_009858895.2_ASM985889v3_genomic.fna"; | 20 die("need reference genome file in the current folder\n") unless -e "GCF_009858895.2_ASM985889v3_genomic.fna"; |
| 21 open(IN,$genome); | 21 open(IN,$genome); |
| 22 while(<IN>) | 22 while(<IN>) |
| 23 { | 23 { |
| 24 next if $_=~/^>/; | 24 next if $_=~/^>/; |
| 56 { | 56 { |
| 57 $pos=$i+1; | 57 $pos=$i+1; |
| 58 $res=$seq_res[$i]; | 58 $res=$seq_res[$i]; |
| 59 } | 59 } |
| 60 } | 60 } |
| 61 %AF_data=%{read_simple_table("af_data_new.csv")}; | 61 %AF_data=%{read_simple_table("AF_current.csv")}; |
| 62 %MFE_data=%{read_simple_table("MFE_annot.csv")}; | 62 %MFE_data=%{read_simple_table("MFE_annot.csv")}; |
| 63 %epi_data=%{read_epitopes("epitopes_annot.csv")}; | 63 %epi_data=%{read_epitopes("epitopes_annot.csv")}; |
| 64 %hyphy_data=%{read_hyphy("hyphy_novel.csv")}; | 64 %hyphy_data=%{read_hyphy("hyphy_current.csv")}; |
| 65 | 65 |
| 66 | 66 |
| 67 | 67 |
| 68 $var_File=shift;#""cl7.csv";#"phenetic_indels_sars_cov2.csv"; | 68 $var_File=shift;#""cl7.csv";#"phenetic_indels_sars_cov2.csv"; |
| 69 $out_File=shift; | 69 $out_File=shift; |
| 170 die("2\n $triplet b:$Bs[1] r:$ref")unless ($Bs[1] eq $ref); | 170 die("2\n $triplet b:$Bs[1] r:$ref")unless ($Bs[1] eq $ref); |
| 171 $Bs[1]=$alt; | 171 $Bs[1]=$alt; |
| 172 }elsif ($mod==0){ | 172 }elsif ($mod==0){ |
| 173 $triplet=substr($seq,$pos-3,3); | 173 $triplet=substr($seq,$pos-3,3); |
| 174 @Bs=split('',$triplet); | 174 @Bs=split('',$triplet); |
| 175 die("3\n $triplet b:$Bs[2] r:$ref")unless ($Bs[2] eq $ref); | 175 die("$_ 3\n $triplet b:$Bs[2] r:$ref")unless ($Bs[2] eq $ref); |
| 176 $Bs[2]=$alt; | 176 $Bs[2]=$alt; |
| 177 } | 177 } |
| 178 #print "$pos_inG $relpos $mod @Bs\n"; | 178 #print "$pos_inG $relpos $mod @Bs\n"; |
| 179 $Atriplet=join("",@Bs); | 179 $Atriplet=join("",@Bs); |
| 180 $A1=$code{$triplet}; | 180 $A1=$code{$triplet}; |
| 278 $eff="inframe"; | 278 $eff="inframe"; |
| 279 if ($ref=~/\./) | 279 if ($ref=~/\./) |
| 280 { | 280 { |
| 281 $eff.="Ins"; | 281 $eff.="Ins"; |
| 282 $Talt=(translate($Calt,\%code))[1]; | 282 $Talt=(translate($Calt,\%code))[1]; |
| 283 if ($Cref=~/[ACTG]{1,}/) | |
| 284 { | |
| 285 $Cref=~s/\.//g; | |
| 286 $Tref=(translate($Cref,\%code))[1]; | |
| 287 } | |
| 288 | |
| 283 } | 289 } |
| 284 if ($alt=~/\./) | 290 if ($alt=~/\./) |
| 285 { | 291 { |
| 286 $eff.="Del"; | 292 $eff.="Del"; |
| 287 $Tref=(translate($Cref,\%code))[1]; | 293 $Tref=(translate($Cref,\%code))[1]; |
| 294 if ($Calt=~/[ACTG]{1,}/) | |
| 295 { | |
| 296 $Calt=~s/\.//g; | |
| 297 $Talt=(translate($Calt,\%code))[1]; | |
| 298 } | |
| 288 } | 299 } |
| 289 if ($eff=~/Del/) | 300 if ($eff=~/Del/) |
| 290 { | 301 { |
| 291 @Tref=split('',$Ttref); | 302 @Tref=split('',$Ttref); |
| 292 for ($i=0;$i<=$#Tref;$i++) | 303 for ($i=0;$i<=$#Tref;$i++) |
| 386 { | 397 { |
| 387 chomp(); | 398 chomp(); |
| 388 ($gene,$pos,@annot)=(split(/\,/)); | 399 ($gene,$pos,@annot)=(split(/\,/)); |
| 389 foreach $a (@annot) | 400 foreach $a (@annot) |
| 390 { | 401 { |
| 402 $a=~s/{//g; | |
| 403 $a=~s/}//g; | |
| 391 ($ref,$key)=(split(/\:/,$a)); | 404 ($ref,$key)=(split(/\:/,$a)); |
| 392 $ref=~s/{//g; | |
| 393 $ref=~s/}//g; | |
| 394 $ref=~s/"//g; | 405 $ref=~s/"//g; |
| 395 $key=~s/"//g; | 406 $key=~s/"//g; |
| 396 #print "$gene $pos $ref $key\n"; | 407 #print "$gene $pos $ref $key\n"; |
| 397 if ($kw{$ref}) | 408 if ($kw{$ref}) |
| 398 { | 409 { |
