annotate snpEff_2_1a/scripts/joinSnpEff.pl @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
1 #!/usr/bin/perl
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
2
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
3 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
4 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
5 # Mark snps as X1, X2 or 'Both'
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
6 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
7 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
8
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
9 use strict;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
10
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
11 my($debug) = 0;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
12
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
13 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
14 # Read a file and index lines by SNP
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
15 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
16 sub readSnps($) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
17 my($file) = (@_);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
18 my($l, %snps);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
19
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
20 open SNP, $file || die "Cannot open file '$file'\n";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
21 while( $l = <SNP> ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
22 my($chr, $pos, $ref, $var) = split /\t/, $l;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
23 my($snp) = "$chr:$pos\_$ref/$var";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
24 $snps{$snp} .= $l;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
25 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
26 close SNP;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
27 return %snps;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
28 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
29
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
30 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
31 # Print SNP info and quals
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
32 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
33 sub printLine($$$$) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
34 my($snp, $lines, $quals, $q) = (@_);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
35 my($line, @lines);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
36 (@lines) = split '\n', $lines;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
37 foreach $line ( @lines ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
38 my($l) = replaceSnpQ($line, $q);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
39 print "$l\t$quals\n";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
40 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
41 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
42
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
43 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
44 # Parse snp quality parameter
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
45 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
46 sub parseSnpQ($) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
47 my($l) = @_;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
48 my(@t);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
49 (@t) = split /\t/,$l;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
50 return $t[6];
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
51 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
52
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
53 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
54 # Replace a quality
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
55 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
56 sub replaceSnpQ($$) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
57 my($line, $q) = @_;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
58 my(@t);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
59 (@t) = split /\t/, $line;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
60 $t[1] = $q;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
61 return join("\t", @t);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
62 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
63
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
64 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
65 # Main
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
66 #------------------------------------------------------------------------------
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
67 # Read arguments
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
68 my(@file);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
69 (@file) = @ARGV;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
70 if( $#file <= 0 ) { die "Usage: ./joinSnpEff.pl tag1 file1 tag2 file2 ... tagN fileN\n"; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
71
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
72 # Parse arguments
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
73 print STDERR "Reading files:\n";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
74 my($i, $j, $file, $tag, $snp, @snpsAll, %snps, @tags);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
75 for( $i=0 , $j=0 ; $i < $#ARGV ; $i+=2, $j++ ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
76 # Read tag
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
77 $tags[$j] = $tag = $ARGV[$i];
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
78
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
79 # Read file
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
80 $file = $ARGV[$i+1];
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
81 if( $file eq '' ) { die "Missing file for tag '$tag'\n"; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
82 print STDERR "\tTags[$j]: $tag\t'$file'\n";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
83 %snps = readSnps($file);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
84
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
85 # Add all snps
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
86 foreach $snp ( keys %snps ) { $snpsAll[$j]->{$snp} = $snps{$snp}; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
87 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
88
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
89 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
90 # Print SNPS
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
91 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
92 my($snp, %done, %snpsi);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
93 my($j, $jj);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
94 $i = 0;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
95 print STDERR "Joining SNP from all files\n";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
96 for( $i=0 ; $i <= $#tags ; $i++ ) { # For all tags
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
97 print "TAG:\ttags[$i] = '$tags[$i]'\n" if $debug;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
98 my($uniq, $shared) = (0, 0);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
99 %snpsi = %{$snpsAll[$i]};
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
100 foreach $snp (sort keys %snpsi) { # For all snps...
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
101 if( ! $done{$snp} ) { # Not done yet?
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
102
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
103 # Get qualities from all SNPs
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
104 my($quals, $qSum, $qCount) = ("", 0, 0);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
105 my($all) = "ALL ";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
106 for( $j=0, $jj=1 ; $j <= $#snpsAll ; $j++ , $jj++ ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
107 if( exists $snpsAll[$j]{$snp} ) {
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
108 my($q) = parseSnpQ($snpsAll[$j]->{$snp});
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
109 $quals .= "$tags[$j]:$q ";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
110 $qSum += $q;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
111 $qCount++;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
112 } else { $all = ""; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
113 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
114
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
115 if( $qCount <= 1 ) { $uniq++; } # Count unique SNPs for this file
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
116 else { $shared++; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
117
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
118 $done{$snp} = 1;
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
119 my($qAvg) = ( $qCount > 0 ? int($qSum/$qCount) : 0);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
120 printLine($snp, $snpsi{$snp}, "$all $quals", $qAvg);
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
121 } else { $shared++; }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
122 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
123 print STDERR "\tTags[$i]: $tags[$i]\tUnique / Shared snps: $uniq / $shared\n";
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
124 }
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
125