comparison yac.py @ 0:2445856981a1 draft

Imported from capsule None
author drosofff
date Mon, 03 Nov 2014 09:34:45 -0500
parents
children e5ef40107f54
comparison
equal deleted inserted replaced
-1:000000000000 0:2445856981a1
1 #!/usr/bin/python
2 # yac = yet another clipper
3 # v 1.1.0 - 23-08-2014 - argparse implementation
4 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode
5 # Christophe Antoniewski <drosofff@gmail.com>
6
7 import sys, string, argparse
8
9 def Parser():
10 the_parser = argparse.ArgumentParser()
11 the_parser.add_argument('--input', action="store", type=str, help="input fastq file")
12 the_parser.add_argument('--output', action="store", type=str, help="output, clipped fasta file")
13 the_parser.add_argument('--adapter_to_clip', action="store", type=str, help="adapter sequence to clip")
14 the_parser.add_argument('--min', action="store", type=int, help="minimal size of clipped sequence to keep")
15 the_parser.add_argument('--max', action="store", type=int, help="maximal size of clipped sequence to keep")
16 the_parser.add_argument('--Nmode', action="store", type=str, choices=["accept", "reject"], help="accept or reject sequences with N for clipping")
17 args = the_parser.parse_args()
18 args.adapter_to_clip = args.adapter_to_clip.upper()
19 return args
20
21
22 class Clip:
23 def __init__(self, inputfile, outputfile, adapter, minsize, maxsize):
24 self.inputfile = inputfile
25 self.outputfile = outputfile
26 self.adapter = adapter
27 self.minsize = int(minsize)
28 self.maxsize = int(maxsize)
29 def motives (sequence):
30 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module'''
31 sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match
32 dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"}
33 for pos in enumerate(sequence[:6]):
34 for subst in dicsubst[pos[1]]:
35 sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7])
36 return sequencevariants
37 self.adaptmotifs= motives(self.adapter)
38
39 def scanadapt(self, adaptmotives=[], sequence=""):
40 '''scans sequence for adapter motives'''
41 if sequence.rfind(adaptmotives[0]) != -1:
42 return sequence[:sequence.rfind(adaptmotives[0])]
43 for motif in adaptmotives[1:]:
44 if sequence.rfind(motif) != -1:
45 return sequence[:sequence.rfind(motif)]
46 return sequence
47
48 def clip_with_N (self):
49 '''clips adapter sequences from inputfile.
50 Reads containing N are retained.'''
51 iterator = 0
52 id = 0
53 F = open (self.inputfile, "r")
54 O = open (self.outputfile, "w")
55 for line in F:
56 iterator += 1
57 if iterator % 4 == 2:
58 trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
59 if self.minsize <= len(trim) <= self.maxsize:
60 id += 1
61 print >> O, ">%i\n%s" % (id, trim)
62 F.close()
63 O.close()
64 def clip_without_N (self):
65 '''clips adapter sequences from inputfile.
66 Reads containing N are rejected.'''
67 iterator = 0
68 id = 0
69 F = open (self.inputfile, "r")
70 O = open (self.outputfile, "w")
71 for line in F:
72 iterator += 1
73 if iterator % 4 == 2:
74 trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
75 if "N" in trim: continue
76 if self.minsize <= len(trim) <= self.maxsize:
77 id += 1
78 print >> O, ">%i\n%s" % (id, trim)
79 F.close()
80 O.close()
81
82 def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode):
83 instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize)
84 if Nmode == "accept":
85 instanceClip.clip_with_N()
86 else:
87 instanceClip.clip_without_N()
88
89 if __name__ == "__main__" :
90 args = Parser()
91 __main__(args.input, args.output, args.adapter_to_clip, args.min, args.max, args.Nmode)