annotate src/genecluster_sequence/__init__.py @ 25:99435bb90725 draft

Uploaded
author bgruening
date Fri, 18 Oct 2013 03:35:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
99435bb90725 Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
99435bb90725 Uploaded
bgruening
parents:
diff changeset
2
99435bb90725 Uploaded
bgruening
parents:
diff changeset
3 """
99435bb90725 Uploaded
bgruening
parents:
diff changeset
4 antiSMASH 2.0 output plugin to write all cluster proteins to a file (*_genecluster_proteins.fa)
99435bb90725 Uploaded
bgruening
parents:
diff changeset
5 """
99435bb90725 Uploaded
bgruening
parents:
diff changeset
6 import logging
99435bb90725 Uploaded
bgruening
parents:
diff changeset
7 import textwrap
99435bb90725 Uploaded
bgruening
parents:
diff changeset
8 from os import path
99435bb90725 Uploaded
bgruening
parents:
diff changeset
9 from antismash import utils
99435bb90725 Uploaded
bgruening
parents:
diff changeset
10
99435bb90725 Uploaded
bgruening
parents:
diff changeset
11 name = "genecluster_proteins"
99435bb90725 Uploaded
bgruening
parents:
diff changeset
12 short_description = "Ouptut gene clusters as FASTA sequences"
99435bb90725 Uploaded
bgruening
parents:
diff changeset
13 # Output plugins are sorted by priority, lower numbers get run first
99435bb90725 Uploaded
bgruening
parents:
diff changeset
14 priority = 9
99435bb90725 Uploaded
bgruening
parents:
diff changeset
15
99435bb90725 Uploaded
bgruening
parents:
diff changeset
16 def write(seq_records, options):
99435bb90725 Uploaded
bgruening
parents:
diff changeset
17 """Write all cluster proteins to a file
99435bb90725 Uploaded
bgruening
parents:
diff changeset
18
99435bb90725 Uploaded
bgruening
parents:
diff changeset
19 Args:
99435bb90725 Uploaded
bgruening
parents:
diff changeset
20 seq_records (iterable): An iterable containing Bio.SeqRecords
99435bb90725 Uploaded
bgruening
parents:
diff changeset
21 options (argparse.Namespace): The options passed to the program
99435bb90725 Uploaded
bgruening
parents:
diff changeset
22 """
99435bb90725 Uploaded
bgruening
parents:
diff changeset
23 basename = seq_records[0].id
99435bb90725 Uploaded
bgruening
parents:
diff changeset
24 output_name = path.join(options.outputfoldername, "%s_genecluster_proteins.fa" % basename)
99435bb90725 Uploaded
bgruening
parents:
diff changeset
25 logging.debug("Writing seq_records to %r" % output_name)
99435bb90725 Uploaded
bgruening
parents:
diff changeset
26
99435bb90725 Uploaded
bgruening
parents:
diff changeset
27 with open(output_name, 'w+') as handle:
99435bb90725 Uploaded
bgruening
parents:
diff changeset
28 for seq_record in seq_records:
99435bb90725 Uploaded
bgruening
parents:
diff changeset
29 clusters = utils.get_cluster_features(seq_record)
99435bb90725 Uploaded
bgruening
parents:
diff changeset
30 for cluster in clusters:
99435bb90725 Uploaded
bgruening
parents:
diff changeset
31 clustertype = utils.get_cluster_type(cluster)
99435bb90725 Uploaded
bgruening
parents:
diff changeset
32 clusternr = utils.get_cluster_number(cluster)
99435bb90725 Uploaded
bgruening
parents:
diff changeset
33 for feature in utils.get_cluster_cds_features(cluster, seq_record):
99435bb90725 Uploaded
bgruening
parents:
diff changeset
34 qual = feature.qualifiers
99435bb90725 Uploaded
bgruening
parents:
diff changeset
35 fasta_header = '>%s:%s %s #%s - %s\n' % (qual['locus_tag'][0], qual['protein_id'][0], clustertype, clusternr, qual['product'][0])
99435bb90725 Uploaded
bgruening
parents:
diff changeset
36 handle.write( fasta_header )
99435bb90725 Uploaded
bgruening
parents:
diff changeset
37 handle.write( '%s\n' % '\n'.join( textwrap.wrap(qual['translation'][0], 60) ) )