Mercurial > repos > jjohnson > defuse8
comparison datamanager_create_reference.xml @ 0:63f23d5db27c draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/defuse commit 2c2fd38cb761ec57bac7a0bd376e6aa2b88265d0-dirty
author | jjohnson |
---|---|
date | Mon, 20 May 2019 15:25:03 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:63f23d5db27c |
---|---|
1 <tool id="data_manager_defuse_reference" name="DeFuse Reference DataManager" version="@DEFUSE_VERSION@.1" tool_type="manage_data"> | |
2 <description>create a defuse reference from Ensembl and UCSC sources</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <requirements> | |
7 <expand macro="defuse_requirement" /> | |
8 </requirements> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 python '$__tool_directory__/datamanager_create_reference.py' | |
11 --dbkey $genome.ensembl_genome_version | |
12 --description "$genome.ensembl_prefix $genome.ensembl_genome_version ($genome.ucsc_genome_version)" | |
13 --defuse_config $defuse_config | |
14 --defuse_script $defuse_script | |
15 $out_file | |
16 ]]></command> | |
17 <configfiles> | |
18 <configfile name="defuse_config"> | |
19 # | |
20 # Configuration file for defuse | |
21 # | |
22 # Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ | |
23 # will be set by the runtime script using the ENV PATH | |
24 # | |
25 | |
26 # Directory where the defuse code was unpacked | |
27 source_directory = __DEFUSE_PATH__ | |
28 | |
29 # Organism IDs | |
30 ensembl_organism = $genome.ensembl_organism | |
31 ensembl_prefix = $genome.ensembl_prefix | |
32 ensembl_version = $genome.ensembl_version | |
33 ensembl_genome_version = $genome.ensembl_genome_version | |
34 ucsc_genome_version = $genome.ucsc_genome_version | |
35 ncbi_organism = $genome.ncbi_organism | |
36 ncbi_prefix = $genome.ncbi_prefix | |
37 | |
38 # Directory where you want your dataset | |
39 dataset_directory = __DATASET_DIRECTORY__ | |
40 | |
41 #raw | |
42 # Input genome and gene models | |
43 gene_models = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).gtf | |
44 genome_fasta = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).dna.chromosomes.fa | |
45 | |
46 # Repeat table from ucsc genome browser | |
47 repeats_filename = $(dataset_directory)/repeats.txt | |
48 | |
49 # EST info downloaded from ucsc genome browser | |
50 est_fasta = $(dataset_directory)/est.fa | |
51 est_alignments = $(dataset_directory)/intronEst.txt | |
52 | |
53 # Unigene clusters downloaded from ncbi | |
54 unigene_fasta = $(dataset_directory)/$(ncbi_prefix).seq.uniq | |
55 #end raw | |
56 | |
57 # Paths to external tools | |
58 samtools_bin = __SAMTOOLS_BIN__ | |
59 bowtie_bin = __BOWTIE_BIN__ | |
60 bowtie_build_bin = __BOWTIE_BUILD_BIN__ | |
61 blat_bin = __BLAT_BIN__ | |
62 fatotwobit_bin = __FATOTWOBIT_BIN__ | |
63 gmap_bin = __GMAP_BIN__ | |
64 gmap_setup_bin = __GMAP_SETUP_BIN__ | |
65 gmap_build_bin = __GMAP_BUILD_BIN__ | |
66 r_bin = __R_BIN__ | |
67 rscript_bin = __RSCRIPT_BIN__ | |
68 | |
69 #raw | |
70 # Directory where you want your dataset | |
71 gmap_index_directory = $(dataset_directory)/gmap | |
72 #end raw | |
73 | |
74 #raw | |
75 # Dataset files | |
76 dataset_prefix = $(dataset_directory)/defuse | |
77 chromosome_prefix = $(dataset_prefix).dna.chromosomes | |
78 exons_fasta = $(dataset_prefix).exons.fa | |
79 cds_fasta = $(dataset_prefix).cds.fa | |
80 cdna_regions = $(dataset_prefix).cdna.regions | |
81 cdna_fasta = $(dataset_prefix).cdna.fa | |
82 reference_fasta = $(dataset_prefix).reference.fa | |
83 rrna_fasta = $(dataset_prefix).rrna.fa | |
84 ig_gene_list = $(dataset_prefix).ig.gene.list | |
85 repeats_regions = $(dataset_directory)/repeats.regions | |
86 est_split_fasta1 = $(dataset_directory)/est.1.fa | |
87 est_split_fasta2 = $(dataset_directory)/est.2.fa | |
88 est_split_fasta3 = $(dataset_directory)/est.3.fa | |
89 est_split_fasta4 = $(dataset_directory)/est.4.fa | |
90 est_split_fasta5 = $(dataset_directory)/est.5.fa | |
91 est_split_fasta6 = $(dataset_directory)/est.6.fa | |
92 est_split_fasta7 = $(dataset_directory)/est.7.fa | |
93 est_split_fasta8 = $(dataset_directory)/est.8.fa | |
94 est_split_fasta9 = $(dataset_directory)/est.9.fa | |
95 | |
96 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs | |
97 prefilter1 = $(unigene_fasta) | |
98 | |
99 # deFuse scripts and tools | |
100 scripts_directory = $(source_directory)/scripts | |
101 tools_directory = $(source_directory)/tools | |
102 data_directory = $(source_directory)/data | |
103 #end raw | |
104 | |
105 # Parameters for building the dataset | |
106 chromosomes = $genome.chromosomes | |
107 mt_chromosome = $genome.mt_chromosome | |
108 gene_sources = $genome.gene_sources | |
109 ig_gene_sources = $genome.ig_gene_sources | |
110 rrna_gene_sources = $genome.rrna_gene_sources | |
111 gene_biotypes = $genome.gene_sources | |
112 ig_gene_biotypes = $genome.ig_gene_sources | |
113 rrna_gene_biotypes = $genome.rrna_gene_sources | |
114 | |
115 #raw | |
116 # Remove temp files | |
117 remove_job_files = yes | |
118 remove_job_temp_files = yes | |
119 #end raw | |
120 </configfile> | |
121 <configfile name="defuse_script">#slurp | |
122 #!/bin/bash | |
123 ## define some things for cheetah proccessing | |
124 #set $amp = chr(38) | |
125 #set $gt = chr(62) | |
126 ## substitute pathnames into config file | |
127 if `grep __DATASET_DIRECTORY__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DATASET_DIRECTORY__#\$1#" $defuse_config; fi | |
128 if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi | |
129 if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi | |
130 if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi | |
131 if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi | |
132 if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi | |
133 if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi | |
134 if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi | |
135 if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi | |
136 if `grep __GMAP_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BUILD_BIN=`which gmap_build`;then sed -i'.tmp' "s#__GMAP_BUILD_BIN__#\${GMAP_BUILD_BIN}#" $defuse_config; fi | |
137 if `grep __GMAP_INDEX_DIR__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_INDEX_DIR=`pwd`/gmap;then sed -i'.tmp' "s#__GMAP_INDEX_DIR__#\${GMAP_INDEX_DIR}#" $defuse_config; fi | |
138 ## copy config to output | |
139 cp $defuse_config \$1/defuse_config.txt | |
140 ## Run the create_reference_dataset.pl | |
141 perl \${DEFUSE_PATH}/scripts/create_reference_dataset.pl -c $defuse_config | |
142 </configfile> | |
143 </configfiles> | |
144 <inputs> | |
145 <conditional name="genome"> | |
146 <param name="choice" type="select" label="Select a Genome Build"> | |
147 <option value="GRCh38">Homo_sapiens GRCh38 hg38</option> | |
148 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option> | |
149 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option> | |
150 <option value="GRCm38">Mus_musculus GRCm38 mm10</option> | |
151 <option value="NCBIM37">Mus_musculus NCBIM37 mm9</option> | |
152 <option value="Rnor_5.0">Rattus_norvegicus Rnor_5.0 rn5</option> | |
153 <option value="user_specified">User specified</option> | |
154 </param> | |
155 <when value="GRCh38"> | |
156 <param name="ensembl_organism" type="hidden" value="homo_sapiens"/> | |
157 <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/> | |
158 <param name="ensembl_genome_version" type="hidden" value="GRCh38"/> | |
159 <param name="ensembl_version" type="hidden" value="80"/> | |
160 <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/> | |
161 <param name="ncbi_prefix" type="hidden" value="Hs"/> | |
162 <param name="ucsc_genome_version" type="hidden" value="hg38"/> | |
163 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/> | |
164 <param name="mt_chromosome" type="hidden" value="MT"/> | |
165 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
166 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
167 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
168 </when> | |
169 <when value="GRCh37"> | |
170 <param name="ensembl_organism" type="hidden" value="homo_sapiens"/> | |
171 <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/> | |
172 <param name="ensembl_genome_version" type="hidden" value="GRCh37"/> | |
173 <param name="ensembl_version" type="hidden" value="71"/> | |
174 <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/> | |
175 <param name="ncbi_prefix" type="hidden" value="Hs"/> | |
176 <param name="ucsc_genome_version" type="hidden" value="hg19"/> | |
177 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/> | |
178 <param name="mt_chromosome" type="hidden" value="MT"/> | |
179 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
180 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
181 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
182 </when> | |
183 <when value="NCBI36"> | |
184 <param name="ensembl_organism" type="hidden" value="homo_sapiens"/> | |
185 <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/> | |
186 <param name="ensembl_genome_version" type="hidden" value="NCBI36"/> | |
187 <param name="ensembl_version" type="hidden" value="54"/> | |
188 <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/> | |
189 <param name="ncbi_prefix" type="hidden" value="Hs"/> | |
190 <param name="ucsc_genome_version" type="hidden" value="hg18"/> | |
191 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/> | |
192 <param name="mt_chromosome" type="hidden" value="MT"/> | |
193 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
194 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
195 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
196 </when> | |
197 <when value="GRCm38"> | |
198 <param name="ensembl_organism" type="hidden" value="mus_musculus"/> | |
199 <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/> | |
200 <param name="ensembl_genome_version" type="hidden" value="GRCm38"/> | |
201 <param name="ensembl_version" type="hidden" value="71"/> | |
202 <param name="ncbi_organism" type="hidden" value="Mus_musculus"/> | |
203 <param name="ncbi_prefix" type="hidden" value="Mm"/> | |
204 <param name="ucsc_genome_version" type="hidden" value="mm10"/> | |
205 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/> | |
206 <param name="mt_chromosome" type="hidden" value="MT"/> | |
207 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
208 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
209 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
210 </when> | |
211 <when value="NCBIM37"> | |
212 <param name="ensembl_organism" type="hidden" value="mus_musculus"/> | |
213 <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/> | |
214 <param name="ensembl_genome_version" type="hidden" value="NCBIM37"/> | |
215 <param name="ensembl_version" type="hidden" value="67"/> | |
216 <param name="ncbi_organism" type="hidden" value="Mus_musculus"/> | |
217 <param name="ncbi_prefix" type="hidden" value="Mm"/> | |
218 <param name="ucsc_genome_version" type="hidden" value="mm9"/> | |
219 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/> | |
220 <param name="mt_chromosome" type="hidden" value="MT"/> | |
221 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
222 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
223 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
224 </when> | |
225 <when value="Rnor_5.0"> | |
226 <param name="ensembl_organism" type="hidden" value="rattus_norvegicus"/> | |
227 <param name="ensembl_prefix" type="hidden" value="Rattus_norvegicus"/> | |
228 <param name="ensembl_genome_version" type="hidden" value="Rnor_5.0"/> | |
229 <param name="ensembl_version" type="hidden" value="71"/> | |
230 <param name="ncbi_organism" type="hidden" value="Rattus_norvegicus"/> | |
231 <param name="ncbi_prefix" type="hidden" value="Rn"/> | |
232 <param name="ucsc_genome_version" type="hidden" value="rn5"/> | |
233 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT"/> | |
234 <param name="mt_chromosome" type="hidden" value="MT"/> | |
235 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/> | |
236 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/> | |
237 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/> | |
238 </when> | |
239 <when value="user_specified"> | |
240 <param name="ensembl_organism" type="text" value="" label="Ensembl Organism Name" help="Examples: homo_sapiens, mus_musculus, rattus_norvegicus"/> | |
241 <param name="ensembl_prefix" type="text" value="" label="Ensembl Organism prefix" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/> | |
242 <param name="ensembl_genome_version" type="text" value="" label="Ensembl Genome Version" help="Examples: GRCh38, GRCh37, GRCm38, Rnor_5.0"/> | |
243 <param name="ensembl_version" type="integer" value="" label="Ensembl Release Version" help="Example: 86"/> | |
244 <param name="ncbi_organism" type="text" value="" label="NCBI Organism Name" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/> | |
245 <param name="ncbi_prefix" type="text" value="" label="NCBI Organism Unigene prefix" help="Examples: Hs, Mm, Rn"/> | |
246 <param name="ucsc_genome_version" type="text" value="" label="UCSC Genome Version" help="Examples: hg38, hg19, mm10, rn5"/> | |
247 <param name="chromosomes" type="text" value="" label="Chromosomes for Ensembl genome build" > | |
248 <help> Examples: | |
249 Homo_sapiens: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT | |
250 Mus_musculus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT | |
251 Rattus_norvegicus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT | |
252 ( ftp://ftp.ensembl.org/pub/release-71/fasta/homo_sapiens/dna/ ) | |
253 </help> | |
254 </param> | |
255 <param name="mt_chromosome" type="text" value="MT" label="Ensembl Mitochonrial Chromosome name" /> | |
256 <param name="gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding" label="Gene sources" /> | |
257 <param name="ig_gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene" label="IG Gene sources" /> | |
258 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" /> | |
259 </when> | |
260 </conditional> | |
261 </inputs> | |
262 <outputs> | |
263 <data name="out_file" format="data_manager_json" label="${tool.name} : ${genome.ensembl_genome_version}"/> | |
264 </outputs> | |
265 <tests> | |
266 </tests> | |
267 <help> | |
268 **DeFuse** | |
269 | |
270 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6_ manual for details. | |
271 | |
272 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: | |
273 - genome_fasta from Ensembl | |
274 - gene_models from Ensembl | |
275 - repeats_filename from UCSC RepeatMasker rmsk.txt | |
276 - est_fasta from UCSC | |
277 - est_alignments from UCSC intronEst.txt | |
278 - unigene_fasta from NCBI | |
279 | |
280 The create_defuse_reference Galaxy tool downloads the reference genome and other source files, and builds any derivative files including bowtie indices, gmap indices, and 2bit files. Expect this step to take at least 12 hours. | |
281 | |
282 | |
283 It will generate the refernce data for deFuse Galaxy tool. | |
284 | |
285 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138 | |
286 | |
287 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page | |
288 | |
289 .. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1 | |
290 | |
291 ------ | |
292 | |
293 **Outputs** | |
294 | |
295 The galaxy history will contain: the config.txt file that provides DeFuse with the reference data paths. | |
296 | |
297 </help> | |
298 </tool> |