Mercurial > repos > drosofff > repenrich
annotate RepEnrich_setup.py @ 3:89968ee4bf95 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 114b47cc624e39b4f485c8623458fc98494c564d
author | drosofff |
---|---|
date | Mon, 29 May 2017 13:12:29 -0400 |
parents | 041de602103e |
children |
rev | line source |
---|---|
0
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/env python |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
2 import argparse |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
3 import csv |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
4 import os |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
5 import shlex |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
6 import subprocess |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
7 import sys |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
8 from Bio import SeqIO |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
9 from Bio.Seq import Seq |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
10 from Bio.SeqRecord import SeqRecord |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
11 from Bio.Alphabet import IUPAC |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
12 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
13 parser = argparse.ArgumentParser(description='Part I: Prepartion of repetive element psuedogenomes and repetive element bamfiles. This script prepares the annotation used by downstream applications to analyze for repetitive element enrichment. For this script to run properly bowtie must be loaded. The repeat element psuedogenomes are prepared in order to analyze reads that map to multiple locations of the genome. The repeat element bamfiles are prepared in order to use a region sorter to analyze reads that map to a single location of the genome.You will 1) annotation_file: The repetitive element annotation file downloaded from RepeatMasker.org database for your organism of interest. 2) genomefasta: Your genome of interest in fasta format, 3)setup_folder: a folder to contain repeat element setup files command-line usage EXAMPLE: python master_setup.py /users/nneretti/data/annotation/mm9/mm9_repeatmasker.txt /users/nneretti/data/annotation/mm9/mm9.fa /users/nneretti/data/annotation/mm9/setup_folder', prog='getargs_genome_maker.py') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
14 parser.add_argument('--version', action='version', version='%(prog)s 0.1') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
15 parser.add_argument('annotation_file', action= 'store', metavar='annotation_file', help='List annotation file. The annotation file contains the repeat masker annotation for the genome of interest and may be downloaded at RepeatMasker.org Example /data/annotation/mm9/mm9.fa.out') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
16 parser.add_argument('genomefasta', action= 'store', metavar='genomefasta', help='File name and path for genome of interest in fasta format. Example /data/annotation/mm9/mm9.fa') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
17 parser.add_argument('setup_folder', action= 'store', metavar='setup_folder', help='List folder to contain bamfiles for repeats and repeat element psuedogenomes. Example /data/annotation/mm9/setup') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
18 parser.add_argument('--nfragmentsfile1', action= 'store', dest='nfragmentsfile1', metavar='nfragmentsfile1', default='./repnames_nfragments.txt', help='Output location of a description file that saves the number of fragments processed per repname. Default ./repnames_nfragments.txt') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
19 parser.add_argument('--gaplength', action= 'store', dest='gaplength', metavar='gaplength', default= '200', type=int, help='Length of the spacer used to build repeat psuedogeneomes. Default 200') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
20 parser.add_argument('--flankinglength', action= 'store', dest='flankinglength', metavar='flankinglength', default= '25', type=int, help='Length of the flanking region adjacent to the repeat element that is used to build repeat psuedogeneomes. The flanking length should be set according to the length of your reads. Default 25') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
21 parser.add_argument('--is_bed', action= 'store', dest='is_bed', metavar='is_bed', default= 'FALSE', help='Is the annotation file a bed file. This is also a compatible format. The file needs to be a tab seperated bed with optional fields. Ex. format chr\tstart\tend\tName_element\tclass\tfamily. The class and family should identical to name_element if not applicable. Default FALSE change to TRUE') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
22 args = parser.parse_args() |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
23 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
24 # parameters and paths specified in args_parse |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
25 gapl = args.gaplength |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
26 flankingl = args.flankinglength |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
27 annotation_file = args.annotation_file |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
28 genomefasta = args.genomefasta |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
29 setup_folder = args.setup_folder |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
30 nfragmentsfile1 = args.nfragmentsfile1 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
31 is_bed = args.is_bed |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
32 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
33 ################################################################################ |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
34 # check that the programs we need are available |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
35 try: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
36 subprocess.call(shlex.split("bowtie --version"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb')) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
37 except OSError: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
38 print ("Error: Bowtie or BEDTools not loaded") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
39 raise |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
40 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
41 ################################################################################ |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
42 # Define a text importer |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
43 csv.field_size_limit(sys.maxsize) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
44 def import_text(filename, separator): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
45 for line in csv.reader(open(os.path.realpath(filename)), delimiter=separator, |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
46 skipinitialspace=True): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
47 if line: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
48 yield line |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
49 # Make a setup folder |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
50 if not os.path.exists(setup_folder): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
51 os.makedirs(setup_folder) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
52 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
53 ################################################################################ |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
54 # load genome into dictionary |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
55 print ("loading genome...") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
56 g = SeqIO.to_dict(SeqIO.parse(genomefasta, "fasta")) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
57 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
58 print ("Precomputing length of all chromosomes...") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
59 idxgenome = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
60 lgenome = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
61 genome = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
62 allchrs = g.keys() |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
63 k = 0 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
64 for chr in allchrs: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
65 genome[chr] = str(g[chr].seq) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
66 # del g[chr] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
67 lgenome[chr] = len(genome[chr]) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
68 idxgenome[chr] = k |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
69 k = k + 1 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
70 del g |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
71 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
72 ################################################################################ |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
73 # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
74 if is_bed == "FALSE": |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
75 repeat_elements= [] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
76 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repnames.bed'), 'w') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
77 fin = import_text(annotation_file, ' ') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
78 x = 0 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
79 rep_chr = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
80 rep_start = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
81 rep_end = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
82 x = 0 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
83 for line in fin: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
84 if x>2: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
85 line9 = line[9].replace("(","_").replace(")","_").replace("/","_") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
86 repname = line9 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
87 if not repname in repeat_elements: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
88 repeat_elements.append(repname) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
89 repchr = line[4] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
90 repstart = int(line[5]) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
91 repend = int(line[6]) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
92 # print >> fout, str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
93 fout.write(str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname)+ '\n') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
94 # if rep_chr.has_key(repname): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
95 if repname in rep_chr: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
96 rep_chr[repname].append(repchr) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
97 rep_start[repname].append(int(repstart)) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
98 rep_end[repname].append(int(repend)) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
99 else: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
100 rep_chr[repname] = [repchr] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
101 rep_start[repname] = [int(repstart)] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
102 rep_end[repname] = [int(repend)] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
103 x +=1 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
104 if is_bed == "TRUE": |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
105 repeat_elements= [] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
106 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repnames.bed'), 'w') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
107 fin = open(os.path.realpath(annotation_file), 'r') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
108 x =0 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
109 rep_chr = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
110 rep_start = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
111 rep_end = {} |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
112 x =0 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
113 for line in fin: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
114 line=line.strip('\n') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
115 line=line.split('\t') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
116 line3 = line[3].replace("(","_").replace(")","_").replace("/","_") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
117 repname = line3 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
118 if not repname in repeat_elements: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
119 repeat_elements.append(repname) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
120 repchr = line[0] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
121 repstart = int(line[1]) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
122 repend = int(line[2]) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
123 # print >> fout, str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
124 fout.write(str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname) + '\n') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
125 # if rep_chr.has_key(repname): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
126 if repname in rep_chr: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
127 rep_chr[repname].append(repchr) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
128 rep_start[repname].append(int(repstart)) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
129 rep_end[repname].append(int(repend)) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
130 else: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
131 rep_chr[repname] = [repchr] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
132 rep_start[repname] = [int(repstart)] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
133 rep_end[repname] = [int(repend)] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
134 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
135 fin.close() |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
136 fout.close() |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
137 repeat_elements = sorted(repeat_elements) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
138 print ("Writing a key for all repeats...") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
139 #print to fout the binary key that contains each repeat type with the associated binary number; sort the binary key: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
140 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repgenomes_key.txt'), 'w') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
141 x = 0 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
142 for repeat in repeat_elements: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
143 # print >> fout, str(repeat) + '\t' + str(x) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
144 fout.write(str(repeat) + '\t' + str(x) + '\n') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
145 x +=1 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
146 fout.close() |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
147 ################################################################################ |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
148 # generate spacer for psuedogenomes |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
149 spacer = "" |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
150 for i in range(gapl): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
151 spacer = spacer + "N" |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
152 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
153 # save file with number of fragments processed per repname |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
154 print ("Saving number of fragments processed per repname to " + nfragmentsfile1) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
155 fout1 = open(os.path.realpath(nfragmentsfile1),"w") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
156 for repname in rep_chr.keys(): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
157 rep_chr_current = rep_chr[repname] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
158 # print >>fout1, str(len(rep_chr[repname])) + "\t" + repname |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
159 fout1.write(str(len(rep_chr[repname])) + "\t" + repname + '\n') |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
160 fout1.close() |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
161 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
162 # generate metagenomes and save them to FASTA files |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
163 k = 1 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
164 nrepgenomes = len(rep_chr.keys()) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
165 for repname in rep_chr.keys(): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
166 metagenome = "" |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
167 newname = repname.replace("(","_").replace(")","_").replace("/","_") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
168 print ("processing repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
169 rep_chr_current = rep_chr[repname] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
170 rep_start_current = rep_start[repname] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
171 rep_end_current = rep_end[repname] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
172 print ("-------> " + str(len(rep_chr[repname])) + " fragments") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
173 for i in range(len(rep_chr[repname])): |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
174 try: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
175 chr = rep_chr_current[i] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
176 rstart = max(rep_start_current[i] - flankingl, 0) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
177 rend = min(rep_end_current[i] + flankingl, lgenome[chr]-1) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
178 metagenome = metagenome + spacer + genome[chr][rstart:(rend+1)] |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
179 except KeyError: |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
180 print ("Unrecognised Chromosome: "+chr) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
181 pass |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
182 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
183 # Convert metagenome to SeqRecord object (required by SeqIO.write) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
184 record = SeqRecord(Seq(metagenome, IUPAC.unambiguous_dna), id = "repname", name = "", description = "") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
185 print ("saving repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
186 fastafilename = os.path.realpath(setup_folder + os.path.sep + newname + ".fa") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
187 SeqIO.write(record, fastafilename, "fasta") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
188 print ("indexing repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
189 command = shlex.split('bowtie-build -f ' + fastafilename + ' ' + setup_folder + os.path.sep + newname) |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
190 p = subprocess.Popen(command).communicate() |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
191 k += 1 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
192 |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
193 print ("... Done") |
041de602103e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit ef0b32c10d178e61faf9042bc5e0d3cc66a10729
drosofff
parents:
diff
changeset
|
194 |