Mercurial > repos > drosofff > yac_clipper
comparison yac.py @ 0:2445856981a1 draft
Imported from capsule None
author | drosofff |
---|---|
date | Mon, 03 Nov 2014 09:34:45 -0500 |
parents | |
children | e5ef40107f54 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2445856981a1 |
---|---|
1 #!/usr/bin/python | |
2 # yac = yet another clipper | |
3 # v 1.1.0 - 23-08-2014 - argparse implementation | |
4 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode | |
5 # Christophe Antoniewski <drosofff@gmail.com> | |
6 | |
7 import sys, string, argparse | |
8 | |
9 def Parser(): | |
10 the_parser = argparse.ArgumentParser() | |
11 the_parser.add_argument('--input', action="store", type=str, help="input fastq file") | |
12 the_parser.add_argument('--output', action="store", type=str, help="output, clipped fasta file") | |
13 the_parser.add_argument('--adapter_to_clip', action="store", type=str, help="adapter sequence to clip") | |
14 the_parser.add_argument('--min', action="store", type=int, help="minimal size of clipped sequence to keep") | |
15 the_parser.add_argument('--max', action="store", type=int, help="maximal size of clipped sequence to keep") | |
16 the_parser.add_argument('--Nmode', action="store", type=str, choices=["accept", "reject"], help="accept or reject sequences with N for clipping") | |
17 args = the_parser.parse_args() | |
18 args.adapter_to_clip = args.adapter_to_clip.upper() | |
19 return args | |
20 | |
21 | |
22 class Clip: | |
23 def __init__(self, inputfile, outputfile, adapter, minsize, maxsize): | |
24 self.inputfile = inputfile | |
25 self.outputfile = outputfile | |
26 self.adapter = adapter | |
27 self.minsize = int(minsize) | |
28 self.maxsize = int(maxsize) | |
29 def motives (sequence): | |
30 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module''' | |
31 sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match | |
32 dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"} | |
33 for pos in enumerate(sequence[:6]): | |
34 for subst in dicsubst[pos[1]]: | |
35 sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7]) | |
36 return sequencevariants | |
37 self.adaptmotifs= motives(self.adapter) | |
38 | |
39 def scanadapt(self, adaptmotives=[], sequence=""): | |
40 '''scans sequence for adapter motives''' | |
41 if sequence.rfind(adaptmotives[0]) != -1: | |
42 return sequence[:sequence.rfind(adaptmotives[0])] | |
43 for motif in adaptmotives[1:]: | |
44 if sequence.rfind(motif) != -1: | |
45 return sequence[:sequence.rfind(motif)] | |
46 return sequence | |
47 | |
48 def clip_with_N (self): | |
49 '''clips adapter sequences from inputfile. | |
50 Reads containing N are retained.''' | |
51 iterator = 0 | |
52 id = 0 | |
53 F = open (self.inputfile, "r") | |
54 O = open (self.outputfile, "w") | |
55 for line in F: | |
56 iterator += 1 | |
57 if iterator % 4 == 2: | |
58 trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) | |
59 if self.minsize <= len(trim) <= self.maxsize: | |
60 id += 1 | |
61 print >> O, ">%i\n%s" % (id, trim) | |
62 F.close() | |
63 O.close() | |
64 def clip_without_N (self): | |
65 '''clips adapter sequences from inputfile. | |
66 Reads containing N are rejected.''' | |
67 iterator = 0 | |
68 id = 0 | |
69 F = open (self.inputfile, "r") | |
70 O = open (self.outputfile, "w") | |
71 for line in F: | |
72 iterator += 1 | |
73 if iterator % 4 == 2: | |
74 trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) | |
75 if "N" in trim: continue | |
76 if self.minsize <= len(trim) <= self.maxsize: | |
77 id += 1 | |
78 print >> O, ">%i\n%s" % (id, trim) | |
79 F.close() | |
80 O.close() | |
81 | |
82 def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode): | |
83 instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize) | |
84 if Nmode == "accept": | |
85 instanceClip.clip_with_N() | |
86 else: | |
87 instanceClip.clip_without_N() | |
88 | |
89 if __name__ == "__main__" : | |
90 args = Parser() | |
91 __main__(args.input, args.output, args.adapter_to_clip, args.min, args.max, args.Nmode) |