comparison snpEff_2_1a/scripts/joinSnpEff.pl @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f8eaa3f8194b
1 #!/usr/bin/perl
2
3 #------------------------------------------------------------------------------
4 #
5 # Mark snps as X1, X2 or 'Both'
6 #
7 #------------------------------------------------------------------------------
8
9 use strict;
10
11 my($debug) = 0;
12
13 #------------------------------------------------------------------------------
14 # Read a file and index lines by SNP
15 #------------------------------------------------------------------------------
16 sub readSnps($) {
17 my($file) = (@_);
18 my($l, %snps);
19
20 open SNP, $file || die "Cannot open file '$file'\n";
21 while( $l = <SNP> ) {
22 my($chr, $pos, $ref, $var) = split /\t/, $l;
23 my($snp) = "$chr:$pos\_$ref/$var";
24 $snps{$snp} .= $l;
25 }
26 close SNP;
27 return %snps;
28 }
29
30 #------------------------------------------------------------------------------
31 # Print SNP info and quals
32 #------------------------------------------------------------------------------
33 sub printLine($$$$) {
34 my($snp, $lines, $quals, $q) = (@_);
35 my($line, @lines);
36 (@lines) = split '\n', $lines;
37 foreach $line ( @lines ) {
38 my($l) = replaceSnpQ($line, $q);
39 print "$l\t$quals\n";
40 }
41 }
42
43 #------------------------------------------------------------------------------
44 # Parse snp quality parameter
45 #------------------------------------------------------------------------------
46 sub parseSnpQ($) {
47 my($l) = @_;
48 my(@t);
49 (@t) = split /\t/,$l;
50 return $t[6];
51 }
52
53 #------------------------------------------------------------------------------
54 # Replace a quality
55 #------------------------------------------------------------------------------
56 sub replaceSnpQ($$) {
57 my($line, $q) = @_;
58 my(@t);
59 (@t) = split /\t/, $line;
60 $t[1] = $q;
61 return join("\t", @t);
62 }
63
64 #------------------------------------------------------------------------------
65 # Main
66 #------------------------------------------------------------------------------
67 # Read arguments
68 my(@file);
69 (@file) = @ARGV;
70 if( $#file <= 0 ) { die "Usage: ./joinSnpEff.pl tag1 file1 tag2 file2 ... tagN fileN\n"; }
71
72 # Parse arguments
73 print STDERR "Reading files:\n";
74 my($i, $j, $file, $tag, $snp, @snpsAll, %snps, @tags);
75 for( $i=0 , $j=0 ; $i < $#ARGV ; $i+=2, $j++ ) {
76 # Read tag
77 $tags[$j] = $tag = $ARGV[$i];
78
79 # Read file
80 $file = $ARGV[$i+1];
81 if( $file eq '' ) { die "Missing file for tag '$tag'\n"; }
82 print STDERR "\tTags[$j]: $tag\t'$file'\n";
83 %snps = readSnps($file);
84
85 # Add all snps
86 foreach $snp ( keys %snps ) { $snpsAll[$j]->{$snp} = $snps{$snp}; }
87 }
88
89 #---
90 # Print SNPS
91 #---
92 my($snp, %done, %snpsi);
93 my($j, $jj);
94 $i = 0;
95 print STDERR "Joining SNP from all files\n";
96 for( $i=0 ; $i <= $#tags ; $i++ ) { # For all tags
97 print "TAG:\ttags[$i] = '$tags[$i]'\n" if $debug;
98 my($uniq, $shared) = (0, 0);
99 %snpsi = %{$snpsAll[$i]};
100 foreach $snp (sort keys %snpsi) { # For all snps...
101 if( ! $done{$snp} ) { # Not done yet?
102
103 # Get qualities from all SNPs
104 my($quals, $qSum, $qCount) = ("", 0, 0);
105 my($all) = "ALL ";
106 for( $j=0, $jj=1 ; $j <= $#snpsAll ; $j++ , $jj++ ) {
107 if( exists $snpsAll[$j]{$snp} ) {
108 my($q) = parseSnpQ($snpsAll[$j]->{$snp});
109 $quals .= "$tags[$j]:$q ";
110 $qSum += $q;
111 $qCount++;
112 } else { $all = ""; }
113 }
114
115 if( $qCount <= 1 ) { $uniq++; } # Count unique SNPs for this file
116 else { $shared++; }
117
118 $done{$snp} = 1;
119 my($qAvg) = ( $qCount > 0 ? int($qSum/$qCount) : 0);
120 printLine($snp, $snpsi{$snp}, "$all $quals", $qAvg);
121 } else { $shared++; }
122 }
123 print STDERR "\tTags[$i]: $tags[$i]\tUnique / Shared snps: $uniq / $shared\n";
124 }
125