annotate RepEnrich_setup.py @ 9:8d4289b34639 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 18b0b149cdda97c8d01caeb6debb77002a3ac89f
author drosofff
date Wed, 31 May 2017 16:51:37 -0400
parents 041de602103e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
1 #!/usr/bin/env python
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
2 import argparse
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
3 import csv
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
4 import os
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
5 import shlex
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
6 import subprocess
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
7 import sys
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
8 from Bio import SeqIO
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
9 from Bio.Seq import Seq
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
10 from Bio.SeqRecord import SeqRecord
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
11 from Bio.Alphabet import IUPAC
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
12
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
13 parser = argparse.ArgumentParser(description='Part I: Prepartion of repetive element psuedogenomes and repetive element bamfiles. This script prepares the annotation used by downstream applications to analyze for repetitive element enrichment. For this script to run properly bowtie must be loaded. The repeat element psuedogenomes are prepared in order to analyze reads that map to multiple locations of the genome. The repeat element bamfiles are prepared in order to use a region sorter to analyze reads that map to a single location of the genome.You will 1) annotation_file: The repetitive element annotation file downloaded from RepeatMasker.org database for your organism of interest. 2) genomefasta: Your genome of interest in fasta format, 3)setup_folder: a folder to contain repeat element setup files command-line usage EXAMPLE: python master_setup.py /users/nneretti/data/annotation/mm9/mm9_repeatmasker.txt /users/nneretti/data/annotation/mm9/mm9.fa /users/nneretti/data/annotation/mm9/setup_folder', prog='getargs_genome_maker.py')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
14 parser.add_argument('--version', action='version', version='%(prog)s 0.1')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
15 parser.add_argument('annotation_file', action= 'store', metavar='annotation_file', help='List annotation file. The annotation file contains the repeat masker annotation for the genome of interest and may be downloaded at RepeatMasker.org Example /data/annotation/mm9/mm9.fa.out')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
16 parser.add_argument('genomefasta', action= 'store', metavar='genomefasta', help='File name and path for genome of interest in fasta format. Example /data/annotation/mm9/mm9.fa')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
17 parser.add_argument('setup_folder', action= 'store', metavar='setup_folder', help='List folder to contain bamfiles for repeats and repeat element psuedogenomes. Example /data/annotation/mm9/setup')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
18 parser.add_argument('--nfragmentsfile1', action= 'store', dest='nfragmentsfile1', metavar='nfragmentsfile1', default='./repnames_nfragments.txt', help='Output location of a description file that saves the number of fragments processed per repname. Default ./repnames_nfragments.txt')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
19 parser.add_argument('--gaplength', action= 'store', dest='gaplength', metavar='gaplength', default= '200', type=int, help='Length of the spacer used to build repeat psuedogeneomes. Default 200')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
20 parser.add_argument('--flankinglength', action= 'store', dest='flankinglength', metavar='flankinglength', default= '25', type=int, help='Length of the flanking region adjacent to the repeat element that is used to build repeat psuedogeneomes. The flanking length should be set according to the length of your reads. Default 25')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
21 parser.add_argument('--is_bed', action= 'store', dest='is_bed', metavar='is_bed', default= 'FALSE', help='Is the annotation file a bed file. This is also a compatible format. The file needs to be a tab seperated bed with optional fields. Ex. format chr\tstart\tend\tName_element\tclass\tfamily. The class and family should identical to name_element if not applicable. Default FALSE change to TRUE')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
22 args = parser.parse_args()
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
23
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
24 # parameters and paths specified in args_parse
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
25 gapl = args.gaplength
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
26 flankingl = args.flankinglength
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
27 annotation_file = args.annotation_file
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
28 genomefasta = args.genomefasta
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
29 setup_folder = args.setup_folder
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
30 nfragmentsfile1 = args.nfragmentsfile1
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
31 is_bed = args.is_bed
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
32
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
33 ################################################################################
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
34 # check that the programs we need are available
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
35 try:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
36 subprocess.call(shlex.split("bowtie --version"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb'))
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
37 except OSError:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
38 print ("Error: Bowtie or BEDTools not loaded")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
39 raise
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
40
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
41 ################################################################################
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
42 # Define a text importer
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
43 csv.field_size_limit(sys.maxsize)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
44 def import_text(filename, separator):
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
45 for line in csv.reader(open(os.path.realpath(filename)), delimiter=separator,
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
46 skipinitialspace=True):
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
47 if line:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
48 yield line
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
49 # Make a setup folder
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
50 if not os.path.exists(setup_folder):
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
51 os.makedirs(setup_folder)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
52
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
53 ################################################################################
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
54 # load genome into dictionary
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
55 print ("loading genome...")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
56 g = SeqIO.to_dict(SeqIO.parse(genomefasta, "fasta"))
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
57
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
58 print ("Precomputing length of all chromosomes...")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
59 idxgenome = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
60 lgenome = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
61 genome = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
62 allchrs = g.keys()
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
63 k = 0
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
64 for chr in allchrs:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
65 genome[chr] = str(g[chr].seq)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
66 # del g[chr]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
67 lgenome[chr] = len(genome[chr])
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
68 idxgenome[chr] = k
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
69 k = k + 1
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
70 del g
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
71
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
72 ################################################################################
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
73 # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
74 if is_bed == "FALSE":
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
75 repeat_elements= []
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
76 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repnames.bed'), 'w')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
77 fin = import_text(annotation_file, ' ')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
78 x = 0
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
79 rep_chr = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
80 rep_start = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
81 rep_end = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
82 x = 0
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
83 for line in fin:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
84 if x>2:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
85 line9 = line[9].replace("(","_").replace(")","_").replace("/","_")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
86 repname = line9
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
87 if not repname in repeat_elements:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
88 repeat_elements.append(repname)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
89 repchr = line[4]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
90 repstart = int(line[5])
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
91 repend = int(line[6])
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
92 # print >> fout, str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
93 fout.write(str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname)+ '\n')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
94 # if rep_chr.has_key(repname):
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
95 if repname in rep_chr:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
96 rep_chr[repname].append(repchr)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
97 rep_start[repname].append(int(repstart))
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
98 rep_end[repname].append(int(repend))
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
99 else:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
100 rep_chr[repname] = [repchr]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
101 rep_start[repname] = [int(repstart)]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
102 rep_end[repname] = [int(repend)]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
103 x +=1
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
104 if is_bed == "TRUE":
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
105 repeat_elements= []
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
106 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repnames.bed'), 'w')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
107 fin = open(os.path.realpath(annotation_file), 'r')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
108 x =0
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
109 rep_chr = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
110 rep_start = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
111 rep_end = {}
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
112 x =0
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
113 for line in fin:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
114 line=line.strip('\n')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
115 line=line.split('\t')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
116 line3 = line[3].replace("(","_").replace(")","_").replace("/","_")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
117 repname = line3
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
118 if not repname in repeat_elements:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
119 repeat_elements.append(repname)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
120 repchr = line[0]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
121 repstart = int(line[1])
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
122 repend = int(line[2])
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
123 # print >> fout, str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
124 fout.write(str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname) + '\n')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
125 # if rep_chr.has_key(repname):
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
126 if repname in rep_chr:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
127 rep_chr[repname].append(repchr)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
128 rep_start[repname].append(int(repstart))
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
129 rep_end[repname].append(int(repend))
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
130 else:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
131 rep_chr[repname] = [repchr]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
132 rep_start[repname] = [int(repstart)]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
133 rep_end[repname] = [int(repend)]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
134
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
135 fin.close()
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
136 fout.close()
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
137 repeat_elements = sorted(repeat_elements)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
138 print ("Writing a key for all repeats...")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
139 #print to fout the binary key that contains each repeat type with the associated binary number; sort the binary key:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
140 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repgenomes_key.txt'), 'w')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
141 x = 0
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
142 for repeat in repeat_elements:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
143 # print >> fout, str(repeat) + '\t' + str(x)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
144 fout.write(str(repeat) + '\t' + str(x) + '\n')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
145 x +=1
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
146 fout.close()
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
147 ################################################################################
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
148 # generate spacer for psuedogenomes
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
149 spacer = ""
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
150 for i in range(gapl):
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
151 spacer = spacer + "N"
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
152
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
153 # save file with number of fragments processed per repname
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
154 print ("Saving number of fragments processed per repname to " + nfragmentsfile1)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
155 fout1 = open(os.path.realpath(nfragmentsfile1),"w")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
156 for repname in rep_chr.keys():
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
157 rep_chr_current = rep_chr[repname]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
158 # print >>fout1, str(len(rep_chr[repname])) + "\t" + repname
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
159 fout1.write(str(len(rep_chr[repname])) + "\t" + repname + '\n')
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
160 fout1.close()
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
161
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
162 # generate metagenomes and save them to FASTA files
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
163 k = 1
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
164 nrepgenomes = len(rep_chr.keys())
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
165 for repname in rep_chr.keys():
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
166 metagenome = ""
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
167 newname = repname.replace("(","_").replace(")","_").replace("/","_")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
168 print ("processing repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
169 rep_chr_current = rep_chr[repname]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
170 rep_start_current = rep_start[repname]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
171 rep_end_current = rep_end[repname]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
172 print ("-------> " + str(len(rep_chr[repname])) + " fragments")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
173 for i in range(len(rep_chr[repname])):
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
174 try:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
175 chr = rep_chr_current[i]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
176 rstart = max(rep_start_current[i] - flankingl, 0)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
177 rend = min(rep_end_current[i] + flankingl, lgenome[chr]-1)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
178 metagenome = metagenome + spacer + genome[chr][rstart:(rend+1)]
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
179 except KeyError:
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
180 print ("Unrecognised Chromosome: "+chr)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
181 pass
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
182
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
183 # Convert metagenome to SeqRecord object (required by SeqIO.write)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
184 record = SeqRecord(Seq(metagenome, IUPAC.unambiguous_dna), id = "repname", name = "", description = "")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
185 print ("saving repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
186 fastafilename = os.path.realpath(setup_folder + os.path.sep + newname + ".fa")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
187 SeqIO.write(record, fastafilename, "fasta")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
188 print ("indexing repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
189 command = shlex.split('bowtie-build -f ' + fastafilename + ' ' + setup_folder + os.path.sep + newname)
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
190 p = subprocess.Popen(command).communicate()
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
191 k += 1
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
192
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
193 print ("... Done")
041de602103e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff changeset
194