Mercurial > repos > p.lucas > extract_fasta_subsequence
comparison exseq_multi.py @ 0:746e286edff0 draft
Uploaded
| author | p.lucas |
|---|---|
| date | Wed, 26 Sep 2018 08:26:17 -0400 |
| parents | |
| children | 612269711364 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:746e286edff0 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 #-*- coding: utf-8 -*- | |
| 3 | |
| 4 """ | |
| 5 | |
| 6 Scripts ayant pour rôle l'extraction de tous les noms de segment contenus dans un fichier genbank. | |
| 7 Réalisé par Pierrick Lucas. | |
| 8 Usage : python Extract_all_segment.py --input_files sequences.gb | |
| 9 | |
| 10 """ | |
| 11 | |
| 12 # Importation des librairies. | |
| 13 import os, sys, argparse, re | |
| 14 from Bio import SeqIO | |
| 15 | |
| 16 | |
| 17 ##### MAIN | |
| 18 def __main__(): | |
| 19 # Options : | |
| 20 parser = argparse.ArgumentParser(description='''Extract sequence of multifasta file from position start/end.''', epilog='''This script need few options, use -h to see it.''') | |
| 21 parser.add_argument('-i', '--input_file', dest='infile', help='Multifasta file.') | |
| 22 parser.add_argument('-s', '--start_position', dest='start', help='Start position to extract.') | |
| 23 parser.add_argument('-e', '--end_position', dest='end', help='End position to extract.') | |
| 24 parser.add_argument('-o', '--output_file', dest='outfile', help='Output file.') | |
| 25 | |
| 26 # Error : | |
| 27 if len(sys.argv)==1 or len(sys.argv)>9 or len(sys.argv)<7 : | |
| 28 parser.print_help() | |
| 29 sys.exit(1) | |
| 30 | |
| 31 # Get options : | |
| 32 options = parser.parse_args() | |
| 33 infile = options.infile | |
| 34 spos = options.start | |
| 35 epos = options.end | |
| 36 outfile = options.outfile | |
| 37 | |
| 38 # Variables : | |
| 39 if outfile is None: | |
| 40 withoutext, justext = os.path.splitext(infile) | |
| 41 outputfile = open(withoutext+"_extract_subseq_"+spos+"_to_"+epos+".fasta","w") | |
| 42 else: | |
| 43 outputfile = open(outfile,"w") | |
| 44 | |
| 45 # Ouputs : | |
| 46 with open(infile, "rU") as inf: | |
| 47 # Input treatment : | |
| 48 for rec in SeqIO.parse(inf, "fasta"): | |
| 49 outputfile.write(rec.id+'\n') | |
| 50 outputfile.write(str(rec.seq[int(spos):int(epos)])+'\n') | |
| 51 | |
| 52 # Fermeture des fichiers. | |
| 53 outputfile.close() | |
| 54 | |
| 55 #### MAIN END | |
| 56 if __name__ == "__main__": __main__() |
