Mercurial > repos > drosofff > yet_another_clipper
comparison YAC/yac.py @ 5:ad813be00215 draft default tip
Uploaded
| author | drosofff |
|---|---|
| date | Sat, 31 May 2014 15:12:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 4:2f536ef15f49 | 5:ad813be00215 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # yac = yet another clipper | |
| 3 # v 1.0.0 | |
| 4 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode | |
| 5 # Christophe Antoniewski <drosofff@gmail.com> | |
| 6 | |
| 7 import sys, string | |
| 8 | |
| 9 class Clip: | |
| 10 def __init__(self, inputfile, outputfile, adapter, minsize, maxsize): | |
| 11 self.inputfile = inputfile | |
| 12 self.outputfile = outputfile | |
| 13 self.adapter = adapter | |
| 14 self.minsize = int(minsize) | |
| 15 self.maxsize = int(maxsize) | |
| 16 def motives (sequence): | |
| 17 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module''' | |
| 18 sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match | |
| 19 dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"} | |
| 20 for pos in enumerate(sequence[:6]): | |
| 21 for subst in dicsubst[pos[1]]: | |
| 22 sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7]) | |
| 23 return sequencevariants | |
| 24 self.adaptmotifs= motives(self.adapter) | |
| 25 | |
| 26 def scanadapt(self, adaptmotives=[], sequence=""): | |
| 27 '''scans sequence for adapter motives''' | |
| 28 if sequence.rfind(adaptmotives[0]) != -1: | |
| 29 return sequence[:sequence.rfind(adaptmotives[0])] | |
| 30 for motif in adaptmotives[1:]: | |
| 31 if sequence.rfind(motif) != -1: | |
| 32 return sequence[:sequence.rfind(motif)] | |
| 33 return sequence | |
| 34 | |
| 35 def clip_with_N (self): | |
| 36 '''clips adapter sequences from inputfile. | |
| 37 Reads containing N are retained.''' | |
| 38 iterator = 0 | |
| 39 id = 0 | |
| 40 F = open (self.inputfile, "r") | |
| 41 O = open (self.outputfile, "w") | |
| 42 for line in F: | |
| 43 iterator += 1 | |
| 44 if iterator % 4 == 2: | |
| 45 trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) | |
| 46 if self.minsize <= len(trim) <= self.maxsize: | |
| 47 id += 1 | |
| 48 print >> O, ">%i\n%s" % (id, trim) | |
| 49 F.close() | |
| 50 O.close() | |
| 51 def clip_without_N (self): | |
| 52 '''clips adapter sequences from inputfile. | |
| 53 Reads containing N are rejected.''' | |
| 54 iterator = 0 | |
| 55 id = 0 | |
| 56 F = open (self.inputfile, "r") | |
| 57 O = open (self.outputfile, "w") | |
| 58 for line in F: | |
| 59 iterator += 1 | |
| 60 if iterator % 4 == 2: | |
| 61 trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) | |
| 62 if "N" in trim: continue | |
| 63 if self.minsize <= len(trim) <= self.maxsize: | |
| 64 id += 1 | |
| 65 print >> O, ">%i\n%s" % (id, trim) | |
| 66 F.close() | |
| 67 O.close() | |
| 68 | |
| 69 def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode): | |
| 70 instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize) | |
| 71 if Nmode == "accept": | |
| 72 instanceClip.clip_with_N() | |
| 73 else: | |
| 74 instanceClip.clip_without_N() | |
| 75 | |
| 76 if __name__ == "__main__" : | |
| 77 input = sys.argv[1] | |
| 78 output = sys.argv[2] | |
| 79 adapter = sys.argv[3] | |
| 80 minsize = sys.argv[4] | |
| 81 maxsize = sys.argv[5] | |
| 82 Nmode = sys.argv[6] | |
| 83 __main__(input, output, adapter, minsize, maxsize, Nmode) |
