0
|
1 #Florent Angly
|
|
2 import sys
|
|
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
|
|
4
|
|
5 def main():
|
|
6 input_filename = sys.argv[1]
|
|
7 input_type = sys.argv[2] or 'sanger'
|
|
8 mate1_filename = sys.argv[3]
|
|
9 mate2_filename = sys.argv[4]
|
|
10 single1_filename = sys.argv[5]
|
|
11 single2_filename = sys.argv[6]
|
|
12
|
|
13 type = input_type
|
|
14 input = fastqNamedReader( open( input_filename, 'rb' ), format = type )
|
|
15 mate1_out = fastqWriter( open( mate1_filename, 'wb' ), format = type )
|
|
16 mate2_out = fastqWriter( open( mate2_filename, 'wb' ), format = type )
|
|
17 single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type )
|
|
18 single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type )
|
|
19 joiner = fastqJoiner( type )
|
|
20
|
|
21 i = None
|
|
22 skip_count = 0
|
|
23 found = {}
|
|
24 for i, read in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ):
|
|
25
|
|
26 if read.identifier in found:
|
|
27 del found[read.identifier]
|
|
28 continue
|
|
29
|
|
30 mate1 = input.get( read.identifier )
|
|
31
|
|
32 mate2 = input.get( joiner.get_paired_identifier( mate1 ) )
|
|
33
|
|
34 if mate2:
|
|
35 # This is a mate pair
|
|
36 found[mate2.identifier] = None
|
|
37 if joiner.is_first_mate( mate1 ):
|
|
38 mate1_out.write( mate1 )
|
|
39 mate2_out.write( mate2 )
|
|
40 else:
|
|
41 mate1_out.write( mate2 )
|
|
42 mate2_out.write( mate1 )
|
|
43 else:
|
|
44 # This is a single
|
|
45 skip_count += 1
|
|
46 if joiner.is_first_mate( mate1 ):
|
|
47 single1_out.write( mate1 )
|
|
48 else:
|
|
49 single2_out.write( mate1 )
|
|
50
|
|
51 if i is None:
|
|
52 print "Your input file contained no valid FASTQ sequences."
|
|
53 else:
|
|
54 if skip_count:
|
|
55 print 'There were %i reads with no mate.' % skip_count
|
|
56 print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 )
|
|
57
|
|
58 input.close()
|
|
59 mate1_out.close()
|
|
60 mate2_out.close()
|
|
61 single1_out.close()
|
|
62 single2_out.close()
|
|
63
|
|
64
|
|
65 if __name__ == "__main__":
|
|
66 main()
|