| 
0
 | 
     1 #!/usr/bin/python
 | 
| 
 | 
     2 #-*- coding: utf-8 -*-
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 """
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 Scripts ayant pour rôle l'extraction de tous les noms de segment contenus dans un fichier genbank.
 | 
| 
 | 
     7 Réalisé par Pierrick Lucas.
 | 
| 
 | 
     8 Usage : python Extract_all_segment.py --input_files sequences.gb
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 """
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 # Importation des librairies.
 | 
| 
 | 
    13 import os, sys, argparse, re
 | 
| 
 | 
    14 from Bio import SeqIO
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 ##### MAIN
 | 
| 
 | 
    18 def __main__():
 | 
| 
 | 
    19   # Options :
 | 
| 
 | 
    20   parser = argparse.ArgumentParser(description='''Extract sequence of multifasta file from position start/end.''', epilog='''This script need few options, use -h to see it.''')
 | 
| 
 | 
    21   parser.add_argument('-i', '--input_file', dest='infile', help='Multifasta file.')
 | 
| 
 | 
    22   parser.add_argument('-s', '--start_position', dest='start', help='Start position to extract.')
 | 
| 
 | 
    23   parser.add_argument('-e', '--end_position', dest='end', help='End position to extract.')
 | 
| 
 | 
    24   parser.add_argument('-o', '--output_file', dest='outfile', help='Output file.')
 | 
| 
 | 
    25 
 | 
| 
 | 
    26   # Error :
 | 
| 
 | 
    27   if len(sys.argv)==1 or len(sys.argv)>9 or len(sys.argv)<7 :
 | 
| 
 | 
    28     parser.print_help()
 | 
| 
 | 
    29     sys.exit(1)
 | 
| 
 | 
    30 
 | 
| 
 | 
    31   # Get options :
 | 
| 
 | 
    32   options = parser.parse_args()
 | 
| 
 | 
    33   infile = options.infile
 | 
| 
 | 
    34   spos = options.start
 | 
| 
 | 
    35   epos = options.end
 | 
| 
 | 
    36   outfile = options.outfile
 | 
| 
 | 
    37 
 | 
| 
 | 
    38   # Variables :
 | 
| 
 | 
    39   if outfile is None:
 | 
| 
 | 
    40     withoutext, justext = os.path.splitext(infile)
 | 
| 
 | 
    41     outputfile = open(withoutext+"_extract_subseq_"+spos+"_to_"+epos+".fasta","w")
 | 
| 
 | 
    42   else:
 | 
| 
 | 
    43     outputfile = open(outfile,"w")
 | 
| 
 | 
    44     
 | 
| 
 | 
    45   # Ouputs :
 | 
| 
 | 
    46   with open(infile, "rU") as inf:
 | 
| 
 | 
    47     # Input treatment :
 | 
| 
 | 
    48     for rec in SeqIO.parse(inf, "fasta"):
 | 
| 
6
 | 
    49       outputfile.write('>'+rec.id+'\n')
 | 
| 
0
 | 
    50       outputfile.write(str(rec.seq[int(spos):int(epos)])+'\n')    
 | 
| 
 | 
    51 
 | 
| 
 | 
    52   # Fermeture des fichiers.               
 | 
| 
 | 
    53   outputfile.close()
 | 
| 
 | 
    54 
 | 
| 
 | 
    55 #### MAIN END
 | 
| 
 | 
    56 if __name__ == "__main__": __main__()
 |