annotate tools/sample_seqs/sample_seqs.py @ 1:369515d7738b draft

Uploaded v0.0.1 preview 2, correct typo in test
author peterjc
date Thu, 27 Mar 2014 09:39:49 -0400
parents 4c5b37848acb
children dc55e58fa890
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
1 #!/usr/bin/env python
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
2 """Sub-sample sequence from a FASTA, FASTQ or SFF file.
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
3
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
4 This tool is a short Python script which requires Biopython 1.62 or later
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
5 for SFF file support. If you use this tool in scientific work leading to a
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
6 publication, please cite the Biopython application note:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
7
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
8 Cock et al 2009. Biopython: freely available Python tools for computational
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
9 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
10 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
11
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
12 This script is copyright 2010-2013 by Peter Cock, The James Hutton Institute
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
13 (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
14 See accompanying text file for licence details (MIT license).
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
15
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
16 This is version 0.1.0 of the script, use -v or --version to get the version.
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
17 """
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
18 import os
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
19 import sys
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
20
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
21 def stop_err(msg, err=1):
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
22 sys.stderr.write(msg.rstrip() + "\n")
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
23 sys.exit(err)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
24
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
25 if "-v" in sys.argv or "--version" in sys.argv:
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
26 print("v0.1.0")
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
27 sys.exit(0)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
28
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
29 #Parse Command Line
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
30 if len(sys.argv) < 5:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
31 stop_err("Requires at least four arguments: seq_format, in_file, out_file, mode, ...")
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
32 seq_format, in_file, out_file, mode = sys.argv[1:5]
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
33 if in_file != "/dev/stdin" and not os.path.isfile(in_file):
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
34 stop_err("Missing input file %r" % in_file)
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
35
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
36 if mode == "everyNth":
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
37 if len(sys.argv) != 6:
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
38 stop_err("If using everyNth, just need argument N (integer, at least 2)")
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
39 try:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
40 N = int(sys.argv[5])
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
41 except:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
42 stop_err("Bad N argument %r" % sys.argv[5])
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
43 if N < 2:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
44 stop_err("Bad N argument %r" % sys.argv[5])
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
45 if (N % 10) == 1:
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
46 sys.stderr.write("Sampling every %ist sequence\n" % N)
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
47 elif (N % 10) == 2:
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
48 sys.stderr.write("Sampling every %ind sequence\n" % N)
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
49 elif (N % 10) == 3:
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
50 sys.stderr.write("Sampling every %ird sequence\n" % N)
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
51 else:
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
52 sys.stderr.write("Sampling every %ith sequence\n" % N)
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
53 def sampler(iterator):
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
54 global N
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
55 count = 0
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
56 for record in iterator:
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
57 count += 1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
58 if count % N == 1:
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
59 yield record
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
60 elif mode == "percentage":
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
61 if len(sys.argv) != 6:
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
62 stop_err("If using percentage, just need percentage argument (float, range 0 to 100)")
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
63 try:
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
64 percent = float(sys.argv[5]) / 100.0
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
65 except:
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
66 stop_err("Bad percent argument %r" % sys.argv[5])
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
67 if percent <= 0.0 or 1.0 <= percent:
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
68 stop_err("Bad percent argument %r" % sys.argv[5])
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
69 sys.stderr.write("Sampling %0.3f%% of sequences\n" % (100.0 * percent))
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
70 def sampler(iterator):
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
71 global percent
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
72 count = 0
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
73 taken = 0
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
74 for record in iterator:
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
75 count += 1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
76 if percent * count > taken:
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
77 taken += 1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
78 yield record
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
79 else:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
80 stop_err("Unsupported mode %r" % mode)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
81
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
82 def raw_fasta_iterator(handle):
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
83 """Yields raw FASTA records as multi-line strings."""
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
84 while True:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
85 line = handle.readline()
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
86 if line == "":
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
87 return # Premature end of file, or just empty?
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
88 if line[0] == ">":
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
89 break
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
90
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
91 no_id_warned = False
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
92 while True:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
93 if line[0] != ">":
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
94 raise ValueError(
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
95 "Records in Fasta files should start with '>' character")
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
96 try:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
97 id = line[1:].split(None, 1)[0]
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
98 except IndexError:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
99 if not no_id_warned:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
100 sys.stderr.write("WARNING - Malformed FASTA entry with no identifier\n")
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
101 no_id_warned = True
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
102 id = None
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
103 lines = [line]
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
104 line = handle.readline()
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
105 while True:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
106 if not line:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
107 break
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
108 if line[0] == ">":
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
109 break
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
110 lines.append(line)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
111 line = handle.readline()
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
112 yield "".join(lines)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
113 if not line:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
114 return # StopIteration
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
115
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
116 def fasta_filter(in_file, out_file, iterator_filter):
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
117 count = 0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
118 #Galaxy now requires Python 2.5+ so can use with statements,
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
119 with open(in_file) as in_handle:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
120 with open(out_file, "w") as pos_handle:
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
121 for record in iterator_filter(raw_fasta_iterator(in_handle)):
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
122 count += 1
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
123 pos_handle.write(record)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
124 return count
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
125
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
126 try:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
127 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
128 def fastq_filter(in_file, out_file, iterator_filter):
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
129 count = 0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
130 #from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
131 reader = fastqReader(open(in_file, "rU"))
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
132 writer = fastqWriter(open(out_file, "w"))
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
133 for record in iterator_filter(reader):
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
134 count += 1
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
135 writer.write(record)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
136 writer.close()
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
137 reader.close()
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
138 return count
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
139 except ImportError:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
140 from Bio.SeqIO.QualityIO import FastqGeneralIterator
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
141 def fastq_filter(in_file, out_file, iterator_filter):
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
142 count = 0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
143 with open(in_file) as in_handle:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
144 with open(out_file, "w") as pos_handle:
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
145 for title, seq, qual in iterator_filter(FastqGeneralIterator(in_handle)):
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
146 count += 1
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
147 pos_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
148 return count
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
149
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
150 def sff_filter(in_file, out_file, iterator_filter):
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
151 count = 0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
152 try:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
153 from Bio.SeqIO.SffIO import SffIterator, SffWriter
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
154 except ImportError:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
155 stop_err("SFF filtering requires Biopython 1.54 or later")
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
156 try:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
157 from Bio.SeqIO.SffIO import ReadRocheXmlManifest
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
158 except ImportError:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
159 #Prior to Biopython 1.56 this was a private function
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
160 from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
161 with open(in_file, "rb") as in_handle:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
162 try:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
163 manifest = ReadRocheXmlManifest(in_handle)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
164 except ValueError:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
165 manifest = None
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
166 in_handle.seek(0)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
167 with open(out_file, "wb") as out_handle:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
168 writer = SffWriter(out_handle, xml=manifest)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
169 in_handle.seek(0) #start again after getting manifest
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
170 count = writer.write_file(iterator_filter(SffIterator(in_handle)))
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
171 #count = writer.write_file(SffIterator(in_handle))
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
172 return count
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
173
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
174 if seq_format.lower()=="sff":
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
175 count = sff_filter(in_file, out_file, sampler)
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
176 elif seq_format.lower()=="fasta":
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
177 count = fasta_filter(in_file, out_file, sampler)
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
178 elif seq_format.lower().startswith("fastq"):
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
179 count = fastq_filter(in_file, out_file, sampler)
0
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
180 else:
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
181 stop_err("Unsupported file type %r" % seq_format)
4c5b37848acb Uploaded v0.0.1 preview 1
peterjc
parents:
diff changeset
182
1
369515d7738b Uploaded v0.0.1 preview 2, correct typo in test
peterjc
parents: 0
diff changeset
183 sys.stderr.write("Sampled %i records\n" % count)