|
0
|
1 @genomes=();
|
|
|
2 $ofile=shift;
|
|
|
3 open(OUT,">$ofile");
|
|
|
4 foreach $f (@ARGV)
|
|
|
5 {
|
|
|
6 open(IN,$f);
|
|
|
7 %ldata=();
|
|
|
8 $genome="";
|
|
|
9 while(<IN>)
|
|
|
10 {
|
|
|
11 chomp();
|
|
|
12 ($pos,$b1,$b2,$gen)=(split(/\s+/))[1,2,3,-1];
|
|
|
13 next unless $b1=~/[ACTG]/ && $b2=~/[ACTG]/;
|
|
|
14 $ldata{$pos}=[$b1,$b2];
|
|
|
15 if ($genome eq "")
|
|
|
16 {
|
|
|
17 $genome=$gen;
|
|
|
18 push(@genomes,$genome);
|
|
|
19 }
|
|
|
20 }
|
|
|
21 $prev_pos=0;
|
|
|
22 $prev_ref="na";
|
|
|
23 $prev_alt="na";
|
|
|
24 foreach $pos (sort{$a<=>$b} keys %ldata)
|
|
|
25 {
|
|
|
26 $dist=$pos-$prev_pos;
|
|
|
27 if ($dist>1)
|
|
|
28 {
|
|
|
29 $pos_append=$prev_pos-length($prev_alt)+1;
|
|
|
30 $dat_final{"$pos_append\_$prev_ref|$prev_alt"}{$genome}=1 unless $prev_ref eq "na";
|
|
|
31 $prev_ref=$ldata{$pos}[0];
|
|
|
32 $prev_alt=$ldata{$pos}[1];
|
|
|
33 }else{
|
|
|
34 $prev_ref.=$ldata{$pos}[0];
|
|
|
35 $prev_alt.=$ldata{$pos}[1];
|
|
|
36 }
|
|
|
37 $prev_pos=$pos;
|
|
|
38 }
|
|
|
39 $pos_append=$prev_pos-length($prev_alt)+1;
|
|
|
40 $dat_final{"$pos_append\_$prev_ref|$prev_alt"}{$genome}=1 if $prev_ref ne "na";
|
|
|
41
|
|
|
42 }
|
|
|
43
|
|
|
44 print OUT " @genomes\n";
|
|
|
45 foreach $pos (sort{$a<=>$b} keys %dat_final)
|
|
|
46 {
|
|
|
47 $line="$pos ";
|
|
|
48 foreach $g (@genomes)
|
|
|
49 {
|
|
|
50 $val=$dat_final{$pos}{$g} ? 1 : 0;
|
|
|
51 $line.="$val ";
|
|
|
52 }
|
|
|
53 chop($line);
|
|
|
54 print OUT "$line\n";
|
|
|
55 }
|