Mercurial > repos > iuc > snpeff
comparison gbk2fa.py @ 31:b7029a54f73e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
| author | iuc |
|---|---|
| date | Mon, 21 Oct 2024 13:55:59 +0000 |
| parents | 030fe29d4c47 |
| children |
comparison
equal
deleted
inserted
replaced
| 30:0c8a8cd9dd5b | 31:b7029a54f73e |
|---|---|
| 4 | 4 |
| 5 from Bio import SeqIO | 5 from Bio import SeqIO |
| 6 | 6 |
| 7 | 7 |
| 8 def get_opener(gbk_filename): | 8 def get_opener(gbk_filename): |
| 9 """Determines the appropriate opener for a given file, supporting | |
| 10 bzip2, gzip, or standard open. | |
| 11 """ | |
| 9 try: | 12 try: |
| 10 bz2.open(gbk_filename).read(1) | 13 bz2.open(gbk_filename).read(1) |
| 11 return bz2.open | 14 return bz2.open |
| 12 except OSError: | 15 except OSError: |
| 13 pass | 16 pass |
| 16 return gzip.open | 19 return gzip.open |
| 17 except OSError: | 20 except OSError: |
| 18 return open | 21 return open |
| 19 | 22 |
| 20 | 23 |
| 21 parser = argparse.ArgumentParser() | 24 def main(): |
| 22 parser.add_argument( | 25 parser = argparse.ArgumentParser( |
| 23 "genbank_file", | 26 description="Convert GenBank files to FASTA format. " |
| 24 help="GenBank input file. Can be compressed with gzip or bzip2" | 27 "Supports gzip and bzip2 compressed files." |
| 25 ) | 28 ) |
| 26 parser.add_argument( | 29 parser.add_argument( |
| 27 "fasta_file", help="FASTA output datset" | 30 "genbank_file", |
| 28 ) | 31 help="GenBank input file. Can be compressed with gzip or bzip2" |
| 29 parser.add_argument( | 32 ) |
| 30 "--remove_version", action="store_true", | 33 parser.add_argument( |
| 31 help="Remove version number from NCBI form formatted accession numbers. " | 34 "fasta_file", |
| 32 "For example, this would convert 'B000657.2' to 'B000657'" | 35 help="FASTA output dataset" |
| 33 ) | 36 ) |
| 34 args = parser.parse_args() | 37 parser.add_argument( |
| 38 "--remove_version", action="store_true", | |
| 39 help="Remove version number from NCBI formatted accession numbers. " | |
| 40 "For example, this converts 'B000657.2' to 'B000657'." | |
| 41 ) | |
| 42 args = parser.parse_args() | |
| 43 | |
| 44 gbk_open = get_opener(args.genbank_file) | |
| 45 with gbk_open(args.genbank_file, 'rt') as input_handle, \ | |
| 46 open(args.fasta_file, 'w') as output_handle: | |
| 47 for seq_record in SeqIO.parse(input_handle, 'genbank'): | |
| 48 if args.remove_version: | |
| 49 seq_id = seq_record.id.split('.')[0] | |
| 50 else: | |
| 51 seq_id = seq_record.id | |
| 52 print(f'Writing FASTA record: {seq_id}') | |
| 53 output_handle.write(f'>{seq_id}\n') | |
| 54 output_handle.write(f'{seq_record.seq}\n') | |
| 35 | 55 |
| 36 | 56 |
| 37 gbk_open = get_opener(args.genbank_file) | 57 if __name__ == "__main__": |
| 38 with gbk_open(args.genbank_file, 'rt') as input_handle, \ | 58 main() |
| 39 open(args.fasta_file, 'w') as output_handle: | |
| 40 for seq_record in SeqIO.parse(input_handle, 'genbank'): | |
| 41 if args.remove_version: | |
| 42 seq_id = seq_record.id.split('.')[0] | |
| 43 else: | |
| 44 seq_id = seq_record.id | |
| 45 print('Writing FASTA record: {}'.format(seq_id)) | |
| 46 print('>' + seq_id, file=output_handle) | |
| 47 print(seq_record.seq, file=output_handle) |
