Mercurial > repos > rnateam > bctools
comparison rm_spurious_events.pl @ 50:0b9aab6aaebf draft
Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
author | rnateam |
---|---|
date | Tue, 26 Jan 2016 04:38:27 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
49:303f6402a035 | 50:0b9aab6aaebf |
---|---|
1 #!/usr/local/perl/bin/perl | |
2 use feature ':5.10'; | |
3 use strict 'vars'; | |
4 use warnings; | |
5 use Getopt::Long; | |
6 use Pod::Usage; | |
7 use List::Util qw/ min max /; | |
8 use POSIX qw/ceil floor/; | |
9 use File::Temp qw(tempdir); | |
10 use File::Basename; | |
11 | |
12 =head1 NAME | |
13 | |
14 filterSquashedReads.pl --frac_max FLOAT | |
15 | |
16 =head1 SYNOPSIS | |
17 | |
18 this script reads sites after pcr duplicate removal. | |
19 for all crosslinking sites sharing start and stop coordinates, the maximum number | |
20 of squashed reads is determined. | |
21 alignments having less than frac_max * max reads are discarded | |
22 | |
23 assumes bed entries to be sorted chr,strand,start,stop,score with score descending | |
24 | |
25 Options: | |
26 | |
27 --frac_max filter out alignments supported by less reads than this fraction of the maximum number of reads per position | |
28 -debug enable debug output | |
29 -help brief help message | |
30 -man full documentation | |
31 | |
32 =head1 DESCRIPTION | |
33 | |
34 =cut | |
35 | |
36 ############################################################################### | |
37 # parse command line options | |
38 ############################################################################### | |
39 my $frac_max = 0.1; | |
40 my $help; | |
41 my $man; | |
42 my $debug; | |
43 my $result = GetOptions ( "frac_max=f" => \$frac_max, | |
44 "help" => \$help, | |
45 "man" => \$man, | |
46 "debug" => \$debug); | |
47 pod2usage(-exitstatus => 1, -verbose => 1) if $help; | |
48 pod2usage(-exitstatus => 0, -verbose => 2) if $man; | |
49 ($result) or pod2usage(2); | |
50 | |
51 ############################################################################### | |
52 # main | |
53 ############################################################################### | |
54 my $current_chr = ''; | |
55 my $current_start = -1; | |
56 my $current_stop = -1; | |
57 my $current_strand = ''; | |
58 my $current_max = -1; | |
59 my $current_threshold = -1; | |
60 | |
61 while (<>) { | |
62 my ($chr, $start, $stop, $id, $count, $strand) = split("\t"); | |
63 # my ($count, undef, $start, $chr, $strand, $stop) = split("\t"); | |
64 | |
65 if ($current_start != $start or | |
66 $current_stop != $stop or | |
67 $current_chr ne $chr or | |
68 $current_strand ne $strand) { | |
69 # if this is the first occourence of these coordinates | |
70 # this must be the new maximum | |
71 # save new state | |
72 $current_start = $start; | |
73 $current_stop = $stop; | |
74 $current_chr = $chr; | |
75 $current_strand = $strand; | |
76 # print record and record maximum | |
77 $current_max = $count; | |
78 $current_threshold = $count*$frac_max; | |
79 print $_; | |
80 $debug and say "new threshold ${current_threshold} @ $chr $start $stop $strand $count"; | |
81 } elsif ($count >= $current_threshold) { | |
82 # if it is not the first occourence, evaluate threshold and print if valid | |
83 print $_; | |
84 } else { | |
85 $debug and say "below threshold @ $chr $start $stop $strand $count"; | |
86 } | |
87 } |