Mercurial > repos > iuc > khmer
comparison filter-below-abund.py @ 0:0187f18785a3 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 37727831a2630b7a7d4fb033366cbd772c3086c8
| author | iuc |
|---|---|
| date | Sat, 17 Oct 2015 04:02:33 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0187f18785a3 |
|---|---|
| 1 #! /usr/bin/env python | |
| 2 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is | |
| 3 # Copyright (C) 2011-2015, Michigan State University. | |
| 4 # Copyright (C) 2015, The Regents of the University of California. | |
| 5 # | |
| 6 # Redistribution and use in source and binary forms, with or without | |
| 7 # modification, are permitted provided that the following conditions are | |
| 8 # met: | |
| 9 # | |
| 10 # * Redistributions of source code must retain the above copyright | |
| 11 # notice, this list of conditions and the following disclaimer. | |
| 12 # | |
| 13 # * Redistributions in binary form must reproduce the above | |
| 14 # copyright notice, this list of conditions and the following | |
| 15 # disclaimer in the documentation and/or other materials provided | |
| 16 # with the distribution. | |
| 17 # | |
| 18 # * Neither the name of the Michigan State University nor the names | |
| 19 # of its contributors may be used to endorse or promote products | |
| 20 # derived from this software without specific prior written | |
| 21 # permission. | |
| 22 # | |
| 23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 27 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 34 # | |
| 35 # Contact: khmer-project@idyll.org | |
| 36 from __future__ import print_function | |
| 37 import sys | |
| 38 import os | |
| 39 import khmer | |
| 40 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter | |
| 41 | |
| 42 WORKER_THREADS = 8 | |
| 43 GROUPSIZE = 100 | |
| 44 | |
| 45 CUTOFF = 50 | |
| 46 | |
| 47 ### | |
| 48 | |
| 49 | |
| 50 def main(): | |
| 51 counting_ht = sys.argv[1] | |
| 52 infiles = sys.argv[2:] | |
| 53 | |
| 54 print('file with ht: %s' % counting_ht) | |
| 55 print('-- settings:') | |
| 56 print('N THREADS', WORKER_THREADS) | |
| 57 print('--') | |
| 58 | |
| 59 print('making hashtable') | |
| 60 ht = khmer.load_countgraph(counting_ht) | |
| 61 K = ht.ksize() | |
| 62 | |
| 63 for infile in infiles: | |
| 64 print('filtering', infile) | |
| 65 outfile = os.path.basename(infile) + '.below' | |
| 66 | |
| 67 outfp = open(outfile, 'w') | |
| 68 | |
| 69 def process_fn(record, ht=ht): | |
| 70 name = record['name'] | |
| 71 seq = record['sequence'] | |
| 72 if 'N' in seq: | |
| 73 return None, None | |
| 74 | |
| 75 trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) | |
| 76 | |
| 77 if trim_at >= K: | |
| 78 return name, trim_seq | |
| 79 | |
| 80 return None, None | |
| 81 | |
| 82 tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) | |
| 83 | |
| 84 tsp.start(verbose_fasta_iter(infile), outfp) | |
| 85 | |
| 86 if __name__ == '__main__': | |
| 87 main() |
