Mercurial > repos > greg > snpeff_v2_from_pablo
comparison snpEff_2_1a/scripts/joinSnpEff.pl @ 0:f8eaa3f8194b default tip
Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
| author | greg |
|---|---|
| date | Fri, 20 Apr 2012 14:47:09 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f8eaa3f8194b |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 #------------------------------------------------------------------------------ | |
| 4 # | |
| 5 # Mark snps as X1, X2 or 'Both' | |
| 6 # | |
| 7 #------------------------------------------------------------------------------ | |
| 8 | |
| 9 use strict; | |
| 10 | |
| 11 my($debug) = 0; | |
| 12 | |
| 13 #------------------------------------------------------------------------------ | |
| 14 # Read a file and index lines by SNP | |
| 15 #------------------------------------------------------------------------------ | |
| 16 sub readSnps($) { | |
| 17 my($file) = (@_); | |
| 18 my($l, %snps); | |
| 19 | |
| 20 open SNP, $file || die "Cannot open file '$file'\n"; | |
| 21 while( $l = <SNP> ) { | |
| 22 my($chr, $pos, $ref, $var) = split /\t/, $l; | |
| 23 my($snp) = "$chr:$pos\_$ref/$var"; | |
| 24 $snps{$snp} .= $l; | |
| 25 } | |
| 26 close SNP; | |
| 27 return %snps; | |
| 28 } | |
| 29 | |
| 30 #------------------------------------------------------------------------------ | |
| 31 # Print SNP info and quals | |
| 32 #------------------------------------------------------------------------------ | |
| 33 sub printLine($$$$) { | |
| 34 my($snp, $lines, $quals, $q) = (@_); | |
| 35 my($line, @lines); | |
| 36 (@lines) = split '\n', $lines; | |
| 37 foreach $line ( @lines ) { | |
| 38 my($l) = replaceSnpQ($line, $q); | |
| 39 print "$l\t$quals\n"; | |
| 40 } | |
| 41 } | |
| 42 | |
| 43 #------------------------------------------------------------------------------ | |
| 44 # Parse snp quality parameter | |
| 45 #------------------------------------------------------------------------------ | |
| 46 sub parseSnpQ($) { | |
| 47 my($l) = @_; | |
| 48 my(@t); | |
| 49 (@t) = split /\t/,$l; | |
| 50 return $t[6]; | |
| 51 } | |
| 52 | |
| 53 #------------------------------------------------------------------------------ | |
| 54 # Replace a quality | |
| 55 #------------------------------------------------------------------------------ | |
| 56 sub replaceSnpQ($$) { | |
| 57 my($line, $q) = @_; | |
| 58 my(@t); | |
| 59 (@t) = split /\t/, $line; | |
| 60 $t[1] = $q; | |
| 61 return join("\t", @t); | |
| 62 } | |
| 63 | |
| 64 #------------------------------------------------------------------------------ | |
| 65 # Main | |
| 66 #------------------------------------------------------------------------------ | |
| 67 # Read arguments | |
| 68 my(@file); | |
| 69 (@file) = @ARGV; | |
| 70 if( $#file <= 0 ) { die "Usage: ./joinSnpEff.pl tag1 file1 tag2 file2 ... tagN fileN\n"; } | |
| 71 | |
| 72 # Parse arguments | |
| 73 print STDERR "Reading files:\n"; | |
| 74 my($i, $j, $file, $tag, $snp, @snpsAll, %snps, @tags); | |
| 75 for( $i=0 , $j=0 ; $i < $#ARGV ; $i+=2, $j++ ) { | |
| 76 # Read tag | |
| 77 $tags[$j] = $tag = $ARGV[$i]; | |
| 78 | |
| 79 # Read file | |
| 80 $file = $ARGV[$i+1]; | |
| 81 if( $file eq '' ) { die "Missing file for tag '$tag'\n"; } | |
| 82 print STDERR "\tTags[$j]: $tag\t'$file'\n"; | |
| 83 %snps = readSnps($file); | |
| 84 | |
| 85 # Add all snps | |
| 86 foreach $snp ( keys %snps ) { $snpsAll[$j]->{$snp} = $snps{$snp}; } | |
| 87 } | |
| 88 | |
| 89 #--- | |
| 90 # Print SNPS | |
| 91 #--- | |
| 92 my($snp, %done, %snpsi); | |
| 93 my($j, $jj); | |
| 94 $i = 0; | |
| 95 print STDERR "Joining SNP from all files\n"; | |
| 96 for( $i=0 ; $i <= $#tags ; $i++ ) { # For all tags | |
| 97 print "TAG:\ttags[$i] = '$tags[$i]'\n" if $debug; | |
| 98 my($uniq, $shared) = (0, 0); | |
| 99 %snpsi = %{$snpsAll[$i]}; | |
| 100 foreach $snp (sort keys %snpsi) { # For all snps... | |
| 101 if( ! $done{$snp} ) { # Not done yet? | |
| 102 | |
| 103 # Get qualities from all SNPs | |
| 104 my($quals, $qSum, $qCount) = ("", 0, 0); | |
| 105 my($all) = "ALL "; | |
| 106 for( $j=0, $jj=1 ; $j <= $#snpsAll ; $j++ , $jj++ ) { | |
| 107 if( exists $snpsAll[$j]{$snp} ) { | |
| 108 my($q) = parseSnpQ($snpsAll[$j]->{$snp}); | |
| 109 $quals .= "$tags[$j]:$q "; | |
| 110 $qSum += $q; | |
| 111 $qCount++; | |
| 112 } else { $all = ""; } | |
| 113 } | |
| 114 | |
| 115 if( $qCount <= 1 ) { $uniq++; } # Count unique SNPs for this file | |
| 116 else { $shared++; } | |
| 117 | |
| 118 $done{$snp} = 1; | |
| 119 my($qAvg) = ( $qCount > 0 ? int($qSum/$qCount) : 0); | |
| 120 printLine($snp, $snpsi{$snp}, "$all $quals", $qAvg); | |
| 121 } else { $shared++; } | |
| 122 } | |
| 123 print STDERR "\tTags[$i]: $tags[$i]\tUnique / Shared snps: $uniq / $shared\n"; | |
| 124 } | |
| 125 |
