|
0
|
1 #!/usr/bin/perl
|
|
|
2
|
|
|
3 #------------------------------------------------------------------------------
|
|
|
4 #
|
|
|
5 # Mark snps as X1, X2 or 'Both'
|
|
|
6 #
|
|
|
7 #------------------------------------------------------------------------------
|
|
|
8
|
|
|
9 use strict;
|
|
|
10
|
|
|
11 my($debug) = 0;
|
|
|
12
|
|
|
13 #------------------------------------------------------------------------------
|
|
|
14 # Read a file and index lines by SNP
|
|
|
15 #------------------------------------------------------------------------------
|
|
|
16 sub readSnps($) {
|
|
|
17 my($file) = (@_);
|
|
|
18 my($l, %snps);
|
|
|
19
|
|
|
20 open SNP, $file || die "Cannot open file '$file'\n";
|
|
|
21 while( $l = <SNP> ) {
|
|
|
22 my($chr, $pos, $ref, $var) = split /\t/, $l;
|
|
|
23 my($snp) = "$chr:$pos\_$ref/$var";
|
|
|
24 $snps{$snp} .= $l;
|
|
|
25 }
|
|
|
26 close SNP;
|
|
|
27 return %snps;
|
|
|
28 }
|
|
|
29
|
|
|
30 #------------------------------------------------------------------------------
|
|
|
31 # Print SNP info and quals
|
|
|
32 #------------------------------------------------------------------------------
|
|
|
33 sub printLine($$$$) {
|
|
|
34 my($snp, $lines, $quals, $q) = (@_);
|
|
|
35 my($line, @lines);
|
|
|
36 (@lines) = split '\n', $lines;
|
|
|
37 foreach $line ( @lines ) {
|
|
|
38 my($l) = replaceSnpQ($line, $q);
|
|
|
39 print "$l\t$quals\n";
|
|
|
40 }
|
|
|
41 }
|
|
|
42
|
|
|
43 #------------------------------------------------------------------------------
|
|
|
44 # Parse snp quality parameter
|
|
|
45 #------------------------------------------------------------------------------
|
|
|
46 sub parseSnpQ($) {
|
|
|
47 my($l) = @_;
|
|
|
48 my(@t);
|
|
|
49 (@t) = split /\t/,$l;
|
|
|
50 return $t[6];
|
|
|
51 }
|
|
|
52
|
|
|
53 #------------------------------------------------------------------------------
|
|
|
54 # Replace a quality
|
|
|
55 #------------------------------------------------------------------------------
|
|
|
56 sub replaceSnpQ($$) {
|
|
|
57 my($line, $q) = @_;
|
|
|
58 my(@t);
|
|
|
59 (@t) = split /\t/, $line;
|
|
|
60 $t[1] = $q;
|
|
|
61 return join("\t", @t);
|
|
|
62 }
|
|
|
63
|
|
|
64 #------------------------------------------------------------------------------
|
|
|
65 # Main
|
|
|
66 #------------------------------------------------------------------------------
|
|
|
67 # Read arguments
|
|
|
68 my(@file);
|
|
|
69 (@file) = @ARGV;
|
|
|
70 if( $#file <= 0 ) { die "Usage: ./joinSnpEff.pl tag1 file1 tag2 file2 ... tagN fileN\n"; }
|
|
|
71
|
|
|
72 # Parse arguments
|
|
|
73 print STDERR "Reading files:\n";
|
|
|
74 my($i, $j, $file, $tag, $snp, @snpsAll, %snps, @tags);
|
|
|
75 for( $i=0 , $j=0 ; $i < $#ARGV ; $i+=2, $j++ ) {
|
|
|
76 # Read tag
|
|
|
77 $tags[$j] = $tag = $ARGV[$i];
|
|
|
78
|
|
|
79 # Read file
|
|
|
80 $file = $ARGV[$i+1];
|
|
|
81 if( $file eq '' ) { die "Missing file for tag '$tag'\n"; }
|
|
|
82 print STDERR "\tTags[$j]: $tag\t'$file'\n";
|
|
|
83 %snps = readSnps($file);
|
|
|
84
|
|
|
85 # Add all snps
|
|
|
86 foreach $snp ( keys %snps ) { $snpsAll[$j]->{$snp} = $snps{$snp}; }
|
|
|
87 }
|
|
|
88
|
|
|
89 #---
|
|
|
90 # Print SNPS
|
|
|
91 #---
|
|
|
92 my($snp, %done, %snpsi);
|
|
|
93 my($j, $jj);
|
|
|
94 $i = 0;
|
|
|
95 print STDERR "Joining SNP from all files\n";
|
|
|
96 for( $i=0 ; $i <= $#tags ; $i++ ) { # For all tags
|
|
|
97 print "TAG:\ttags[$i] = '$tags[$i]'\n" if $debug;
|
|
|
98 my($uniq, $shared) = (0, 0);
|
|
|
99 %snpsi = %{$snpsAll[$i]};
|
|
|
100 foreach $snp (sort keys %snpsi) { # For all snps...
|
|
|
101 if( ! $done{$snp} ) { # Not done yet?
|
|
|
102
|
|
|
103 # Get qualities from all SNPs
|
|
|
104 my($quals, $qSum, $qCount) = ("", 0, 0);
|
|
|
105 my($all) = "ALL ";
|
|
|
106 for( $j=0, $jj=1 ; $j <= $#snpsAll ; $j++ , $jj++ ) {
|
|
|
107 if( exists $snpsAll[$j]{$snp} ) {
|
|
|
108 my($q) = parseSnpQ($snpsAll[$j]->{$snp});
|
|
|
109 $quals .= "$tags[$j]:$q ";
|
|
|
110 $qSum += $q;
|
|
|
111 $qCount++;
|
|
|
112 } else { $all = ""; }
|
|
|
113 }
|
|
|
114
|
|
|
115 if( $qCount <= 1 ) { $uniq++; } # Count unique SNPs for this file
|
|
|
116 else { $shared++; }
|
|
|
117
|
|
|
118 $done{$snp} = 1;
|
|
|
119 my($qAvg) = ( $qCount > 0 ? int($qSum/$qCount) : 0);
|
|
|
120 printLine($snp, $snpsi{$snp}, "$all $quals", $qAvg);
|
|
|
121 } else { $shared++; }
|
|
|
122 }
|
|
|
123 print STDERR "\tTags[$i]: $tags[$i]\tUnique / Shared snps: $uniq / $shared\n";
|
|
|
124 }
|
|
|
125
|