annotate src/genecluster_sequence/__init__.py @ 23:d26e8aa37ce9 draft

Uploaded
author bgruening
date Mon, 14 Oct 2013 04:36:52 -0400
parents 225d40beff1a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
22
225d40beff1a Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
225d40beff1a Uploaded
bgruening
parents:
diff changeset
2
225d40beff1a Uploaded
bgruening
parents:
diff changeset
3 """
225d40beff1a Uploaded
bgruening
parents:
diff changeset
4 antiSMASH 2.0 output plugin to write all cluster proteins to a file (*_genecluster_proteins.fa)
225d40beff1a Uploaded
bgruening
parents:
diff changeset
5 """
225d40beff1a Uploaded
bgruening
parents:
diff changeset
6 import logging
225d40beff1a Uploaded
bgruening
parents:
diff changeset
7 from os import path
225d40beff1a Uploaded
bgruening
parents:
diff changeset
8 from antismash import utils
225d40beff1a Uploaded
bgruening
parents:
diff changeset
9
225d40beff1a Uploaded
bgruening
parents:
diff changeset
10 name = "genecluster_proteins"
225d40beff1a Uploaded
bgruening
parents:
diff changeset
11 short_description = "Ouptut gene clusters as FASTA sequences"
225d40beff1a Uploaded
bgruening
parents:
diff changeset
12 # Output plugins are sorted by priority, lower numbers get run first
225d40beff1a Uploaded
bgruening
parents:
diff changeset
13 priority = 9
225d40beff1a Uploaded
bgruening
parents:
diff changeset
14
225d40beff1a Uploaded
bgruening
parents:
diff changeset
15 def write(seq_records, options):
225d40beff1a Uploaded
bgruening
parents:
diff changeset
16 """Write all cluster proteins to a file
225d40beff1a Uploaded
bgruening
parents:
diff changeset
17
225d40beff1a Uploaded
bgruening
parents:
diff changeset
18 Args:
225d40beff1a Uploaded
bgruening
parents:
diff changeset
19 seq_records (iterable): An iterable containing Bio.SeqRecords
225d40beff1a Uploaded
bgruening
parents:
diff changeset
20 options (argparse.Namespace): The options passed to the program
225d40beff1a Uploaded
bgruening
parents:
diff changeset
21 """
225d40beff1a Uploaded
bgruening
parents:
diff changeset
22 basename = seq_records[0].id
225d40beff1a Uploaded
bgruening
parents:
diff changeset
23 output_name = path.join(options.outputfoldername, "%s_genecluster_proteins.fa" % basename)
225d40beff1a Uploaded
bgruening
parents:
diff changeset
24 logging.debug("Writing seq_records to %r" % output_name)
225d40beff1a Uploaded
bgruening
parents:
diff changeset
25
225d40beff1a Uploaded
bgruening
parents:
diff changeset
26 with open(output_name, 'w+') as handle:
225d40beff1a Uploaded
bgruening
parents:
diff changeset
27 for seq_record in seq_records:
225d40beff1a Uploaded
bgruening
parents:
diff changeset
28 clusters = utils.get_cluster_features(seq_record)
225d40beff1a Uploaded
bgruening
parents:
diff changeset
29 for cluster in clusters:
225d40beff1a Uploaded
bgruening
parents:
diff changeset
30 clustertype = utils.get_cluster_type(cluster)
225d40beff1a Uploaded
bgruening
parents:
diff changeset
31 clusternr = utils.get_cluster_number(cluster)
225d40beff1a Uploaded
bgruening
parents:
diff changeset
32 for feature in utils.get_cluster_cds_features(cluster, seq_record):
225d40beff1a Uploaded
bgruening
parents:
diff changeset
33 qual = feature.qualifiers
225d40beff1a Uploaded
bgruening
parents:
diff changeset
34 fasta_header = '>%s:%s %s #%s - %s\n' % (qual['locus_tag'][0], qual['protein_id'][0], clustertype, clusternr, qual['product'][0])
225d40beff1a Uploaded
bgruening
parents:
diff changeset
35 handle.write( fasta_header )
225d40beff1a Uploaded
bgruening
parents:
diff changeset
36 handle.write( '%s\n' % qual['translation'][0] )
225d40beff1a Uploaded
bgruening
parents:
diff changeset
37
225d40beff1a Uploaded
bgruening
parents:
diff changeset
38