annotate glimmer_gbk_to_orf.py @ 0:e7c43934d083 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author iuc
date Tue, 28 Nov 2017 09:54:34 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
1 #!/usr/bin/env python
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
2
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
3 ###################################################################
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
4 #
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
5 # gbk2orf.py by Errol Strain (estrain@gmail.com)
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
6 #
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
7 # Read a GenBank file and export fasta formatted amino acid and
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
8 # CDS files
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
9 #
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
10 ###################################################################
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
11
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
12 import sys
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
13 from optparse import OptionParser
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
14
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
15 from Bio import SeqIO
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
16 from Bio.Seq import Seq
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
17 from Bio.SeqRecord import SeqRecord
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
18
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
19
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
20 # Command line usage
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
21 usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta"
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
22 p = OptionParser(usage)
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
23 p.add_option("-t", "--translate", dest="transtabl", type="int", default=11,
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
24 help="Translation table used to translate coding regions (default=11)")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
25 p.add_option("-g", "--genbank", dest="gb_file", help="GenBank input file")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
26 p.add_option("-a", "--amino_acid", dest="aa_file", help="Fasta amino acid output")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
27 p.add_option("-n", "--nucleotide", dest="orf_file", help="Fasta nucleotide output")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
28 (opts, args) = p.parse_args()
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
29 # Do I need this next line?
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
30 if not opts and not args:
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
31 p.error("Use --help to see usage")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
32 if len(sys.argv) == 1:
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
33 p.error("Use --help to see usage")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
34
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
35 # Lists to hold SeqRecords
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
36 aalist = []
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
37 nuclist = []
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
38
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
39 # If the CDS does not have a locus tag the name will be assigned using the
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
40 # order in which it was found
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
41 feat_count = 0
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
42
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
43 # Iterate through genbank records in input file
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
44 for gb_record in SeqIO.parse(open(opts.gb_file, "r"), "genbank"):
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
45 for (index, feature) in enumerate(gb_record.features):
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
46 if feature.type == "CDS":
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
47 feat_count = feat_count + 1
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
48 gene = feature.extract(gb_record.seq)
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
49 if "locus_tag" in feature.qualifiers:
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
50 value = feature.qualifiers["locus_tag"][0]
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
51 else:
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
52 value = "Index_" + str(feat_count)
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
53 nuclist.append(SeqRecord(Seq(str(gene)), id=value, name=value))
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
54 pro = Seq(str(gene.translate(table=opts.transtabl, to_stop=True)))
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
55 aalist.append(SeqRecord(pro, id=value, name=value))
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
56
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
57 # Write out lists in fasta format
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
58 aa_handle = open(opts.aa_file, "w")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
59 SeqIO.write(aalist, aa_handle, "fasta")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
60 aa_handle.close()
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
61 orf_handle = open(opts.orf_file, "w")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
62 SeqIO.write(nuclist, orf_handle, "fasta")
e7c43934d083 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
iuc
parents:
diff changeset
63 orf_handle.close()