Mercurial > repos > bgruening > augustus
annotate extract_features.py @ 10:cb47e789ccaa draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
| author | bgruening |
|---|---|
| date | Fri, 10 May 2019 08:25:57 -0400 |
| parents | bcfe8e0731f8 |
| children | 66c8e9d8d1c4 |
| rev | line source |
|---|---|
| 1 | 1 #!/usr/bin/env python |
| 2 | |
|
10
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
3 import argparse |
| 1 | 4 import sys |
| 5 import textwrap | |
| 6 | |
|
10
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
7 |
| 1 | 8 def main( args ): |
| 9 """ | |
| 10 Extract the protein and coding section from an augustus gff, gtf file | |
| 11 Example file: | |
| 12 HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 Parent=g1.t1 | |
| 13 HS04636 AUGUSTUS transcription_end_site 8857 8857 . + . Parent=g1.t1 | |
| 14 # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL | |
| 15 # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD | |
| 16 # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG | |
| 17 # QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH | |
| 18 # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE | |
| 19 # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV | |
| 20 # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL] | |
| 21 # end gene g1 | |
| 22 ### | |
| 23 # | |
| 24 # ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- | |
| 25 # | |
| 26 # Predicted genes for sequence number 2 on both strands | |
| 27 # start gene g2 | |
| 28 HS08198 AUGUSTUS gene 86 2344 1 + . ID=g2 | |
| 29 HS08198 AUGUSTUS transcript 86 2344 . + . ID=g2.t1;Parent=g2 | |
| 30 HS08198 AUGUSTUS transcription_start_site 86 86 . + . Parent=g2.t1 | |
| 31 HS08198 AUGUSTUS exon 86 582 . + . Parent=g2.t1 | |
| 32 HS08198 AUGUSTUS start_codon 445 447 . + 0 Parent=g2.t1 | |
| 33 """ | |
| 34 protein_seq = '' | |
| 35 coding_seq = '' | |
| 36 if args.protein: | |
| 37 po = open( args.protein, 'w+' ) | |
| 38 if args.codingseq: | |
| 39 co = open( args.codingseq, 'w+' ) | |
| 40 | |
| 41 for line in sys.stdin: | |
| 42 # protein- and coding-sequence are stored as comments | |
| 43 if line.startswith('#'): | |
| 44 line = line[2:].strip() | |
| 45 if line.startswith('start gene'): | |
| 46 gene_name = line[11:].strip() | |
| 47 | |
|
10
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
48 if protein_seq: |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
49 if line.endswith(']'): |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
50 protein_seq += line[:-1] |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
51 po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) ) |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
52 protein_seq = '' |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
53 else: |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
54 protein_seq += line |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
55 |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
56 if coding_seq: |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
57 if line.endswith(']'): |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
58 coding_seq += line[:-1] |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
59 co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) ) |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
60 coding_seq = '' |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
61 else: |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
62 coding_seq += line |
|
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
63 |
| 1 | 64 if args.protein and line.startswith('protein sequence = ['): |
| 65 if line.endswith(']'): | |
| 5 | 66 protein_seq = line[20:-1] |
| 67 po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) ) | |
| 68 protein_seq = '' | |
| 1 | 69 else: |
| 70 line = line[20:] | |
| 71 protein_seq = line | |
| 72 | |
| 73 if args.codingseq and line.startswith('coding sequence = ['): | |
| 74 if line.endswith(']'): | |
| 5 | 75 coding_seq = line[19:-1] |
| 76 co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) ) | |
| 77 coding_seq = '' | |
| 1 | 78 else: |
| 3 | 79 line = line[19:] |
| 80 coding_seq = line | |
| 1 | 81 |
| 82 if args.codingseq: | |
| 83 co.close() | |
| 84 if args.protein: | |
| 85 po.close() | |
| 86 | |
|
10
cb47e789ccaa
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 4a8f640dec78899be470ddd1c436fc0d158c80f3
bgruening
parents:
5
diff
changeset
|
87 |
| 1 | 88 if __name__ == '__main__': |
| 89 parser = argparse.ArgumentParser() | |
| 90 parser.add_argument('-p', '--protein', help='Path to the protein file.') | |
| 91 parser.add_argument('-c', '--codingseq', help='Path to the coding file.') | |
| 92 | |
| 93 args = parser.parse_args() | |
| 94 main( args ) |
