comparison generate_sliding_windows.py @ 0:559cf4ca1f2d draft

Uploaded
author mvdbeek
date Wed, 15 Apr 2015 06:34:23 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:559cf4ca1f2d
1 #!/usr/bin/env python
2 from Bio import SeqIO
3 import argparse
4 import sys
5
6 def generate_windows(seq, window, step):
7 '''
8 Generates windows of a sequence, with the distance of windows
9 defined by *step*.
10
11 seq -- string to split into windows.
12 window -- integer specifying the size the generated fragments.
13 step -- integer specifiying the distance between adjacent fragments.
14 '''
15 stop = window
16 end = len(seq)
17 for i in range(stop, end, step):
18 start = stop-window
19 fragment = seq[start:stop]
20 stop_coordinate = stop #to return real stop coordinate
21 stop = stop+step
22 yield (fragment, start+1, stop_coordinate) #start+1 to adjust 0-based range
23
24
25 def write_fragment(description, output_handle, fragment, start, stop):
26 '''Write out fragments as fasta with description and start/stop coordinates as fasta header'''
27 output_string = ">{0}_start:{1}_stop:{2}\n{3}\n".format(description, start, stop, fragment)
28 output_handle.write(output_string)
29
30
31 def handle_io(input, output, window = 21, step= 21):
32 '''
33 Keyword arguments:
34 input -- file handle for fasta file containing sequences for which you wish to generate fragments.
35 output -- file handle for the multi-fasta that will contain the generated fragments.
36 window -- integer specifying the size of the fragments.
37 step -- integer specifiying the distance between adjacent fragments.
38 '''
39 record_iterator = SeqIO.parse(input, "fasta")
40 for entry in record_iterator:
41 seq = str(entry.seq)
42 description = str(entry.description)
43 windows = generate_windows(seq, window, step)
44 [write_fragment(description, output, *fragment) for fragment in windows]
45 output.close()
46 input.close()
47
48 def positive_int(val):
49 try:
50 assert(int(val) > 0)
51 except:
52 raise ArgumentTypeError("'%s' is not a valid positive int" % val)
53 return int(val)
54
55 if __name__ == "__main__":
56
57 parser = argparse.ArgumentParser(description='Generate fixed size windows in fasta format from multi-fasta sequence.')
58 parser.add_argument('--input', type=argparse.FileType('r'), required=True,
59 help='supply an input multi-fasta file.')
60 parser.add_argument('--output', type=argparse.FileType('w'), default=sys.stdout,
61 help='supply an output multi-fasta file. If not specified use stdout.')
62 parser.add_argument('--window', type=positive_int, default=21,
63 help='Set the size of the generated windows')
64 parser.add_argument('--step', type=positive_int, default=21,
65 help='Set distance between the windows')
66 args = parser.parse_args()
67
68 handle_io(args.input, args.output, args.window, args.step)