Mercurial > repos > mvdbeek > planemo_0_24_2_test
comparison yac.py @ 0:a5fee116b75b draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 70312b58ba246c07e70cdbd0a097f274f1386d09-dirty
| author | mvdbeek |
|---|---|
| date | Mon, 25 Apr 2016 09:20:55 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a5fee116b75b |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # yac = yet another clipper | |
| 3 # v 1.2.1 - 23-08-2014 - Support FastQ output | |
| 4 # v 1.1.0 - 23-08-2014 - argparse implementation | |
| 5 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode | |
| 6 # Christophe Antoniewski <drosofff@gmail.com> | |
| 7 | |
| 8 import sys | |
| 9 import string | |
| 10 import argparse | |
| 11 from itertools import islice | |
| 12 | |
| 13 | |
| 14 def Parser(): | |
| 15 the_parser = argparse.ArgumentParser() | |
| 16 the_parser.add_argument( | |
| 17 '--input', action="store", nargs='+', help="input fastq files") | |
| 18 the_parser.add_argument( | |
| 19 '--output', action="store", type=str, help="output, clipped fasta file") | |
| 20 the_parser.add_argument( | |
| 21 '--output_format', action="store", type=str, help="output format, fasta or fastq") | |
| 22 the_parser.add_argument( | |
| 23 '--adapter_to_clip', action="store", type=str, help="adapter sequence to clip") | |
| 24 the_parser.add_argument( | |
| 25 '--min', action="store", type=int, help="minimal size of clipped sequence to keep") | |
| 26 the_parser.add_argument( | |
| 27 '--max', action="store", type=int, help="maximal size of clipped sequence to keep") | |
| 28 the_parser.add_argument('--Nmode', action="store", type=str, choices=[ | |
| 29 "accept", "reject"], help="accept or reject sequences with N for clipping") | |
| 30 args = the_parser.parse_args() | |
| 31 args.adapter_to_clip = args.adapter_to_clip.upper() | |
| 32 return args | |
| 33 | |
| 34 | |
| 35 class Clip: | |
| 36 | |
| 37 def __init__(self, inputfile, outputfile, output_format, adapter, minsize, maxsize, Nmode): | |
| 38 self.inputfile = inputfile | |
| 39 self.outputfile = outputfile | |
| 40 self.output_format = output_format | |
| 41 self.adapter = adapter | |
| 42 self.minsize = int(minsize) | |
| 43 self.maxsize = int(maxsize) | |
| 44 self.Nmode = Nmode | |
| 45 | |
| 46 def motives(sequence): | |
| 47 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module''' | |
| 48 sequencevariants = [ | |
| 49 sequence[0:6]] # initializes the list with the 6mer perfect match | |
| 50 dicsubst = {"A": "TGCN", "T": "AGCN", "G": "TACN", "C": "GATN"} | |
| 51 for pos in enumerate(sequence[:6]): | |
| 52 for subst in dicsubst[pos[1]]: | |
| 53 sequencevariants.append( | |
| 54 sequence[:pos[0]] + subst + sequence[pos[0] + 1:7]) | |
| 55 return sequencevariants | |
| 56 self.adaptmotifs = motives(self.adapter) | |
| 57 | |
| 58 def scanadapt(self, adaptmotives=[], sequence="", qscore=""): | |
| 59 '''scans sequence for adapter motives''' | |
| 60 match_position = sequence.rfind(adaptmotives[0]) | |
| 61 if match_position != -1: | |
| 62 return sequence[:match_position], qscore[:match_position] | |
| 63 for motif in adaptmotives[1:]: | |
| 64 match_position = sequence.rfind(motif) | |
| 65 if match_position != -1: | |
| 66 return sequence[:match_position], qscore[:match_position] | |
| 67 return sequence, qscore | |
| 68 | |
| 69 def write_output(self, id, read, qscore, output): | |
| 70 if self.output_format == "fasta": | |
| 71 block = ">{0}\n{1}\n".format(id, read) | |
| 72 else: | |
| 73 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore) | |
| 74 output.write(block) | |
| 75 | |
| 76 def handle_io(self): | |
| 77 '''Open input file, pass read sequence and read qscore to clipping function. | |
| 78 Pass clipped read and qscore to output function.''' | |
| 79 id = 0 | |
| 80 output = open(self.outputfile, "a") | |
| 81 with open(self.inputfile, "r") as input: | |
| 82 block_gen = islice(input, 1, None, 2) | |
| 83 for i, line in enumerate(block_gen): | |
| 84 if i % 2: | |
| 85 qscore = line.rstrip() | |
| 86 else: | |
| 87 read = line.rstrip() | |
| 88 continue | |
| 89 trimmed_read, trimmed_qscore = self.scanadapt( | |
| 90 self.adaptmotifs, read, qscore) | |
| 91 if self.minsize <= len(trimmed_read) <= self.maxsize: | |
| 92 if (self.Nmode == "reject") and ("N" in trimmed_read): | |
| 93 continue | |
| 94 id += 1 | |
| 95 self.write_output(id, trimmed_read, trimmed_qscore, output) | |
| 96 output.close() | |
| 97 | |
| 98 | |
| 99 def main(*argv): | |
| 100 instanceClip = Clip(*argv) | |
| 101 instanceClip.handle_io() | |
| 102 | |
| 103 if __name__ == "__main__": | |
| 104 args = Parser() | |
| 105 id = 0 | |
| 106 for inputfile in args.input: | |
| 107 main(inputfile, args.output, args.output_format, | |
| 108 args.adapter_to_clip, args.min, args.max, args.Nmode) |
