Mercurial > repos > greg > ideas
comparison ideas.xml @ 169:7b0c6c6cb82b draft
Uploaded
author | greg |
---|---|
date | Thu, 25 Jan 2018 09:30:44 -0500 |
parents | 5c5e2f7b34c8 |
children | 445f67ea18f6 |
comparison
equal
deleted
inserted
replaced
168:5c5e2f7b34c8 | 169:7b0c6c6cb82b |
---|---|
11 </requirements> | 11 </requirements> |
12 <command detect_errors="exit_code"><![CDATA[ | 12 <command detect_errors="exit_code"><![CDATA[ |
13 #set output_pdf_dir = "output_pdf_dir" | 13 #set output_pdf_dir = "output_pdf_dir" |
14 #set output_txt_dir = "output_txt_dir" | 14 #set output_txt_dir = "output_txt_dir" |
15 #set output_training_dir = "output_training_dir" | 15 #set output_training_dir = "output_training_dir" |
16 #set tmp_dir = "tmp" | |
17 #set prep_input_config = "prep_input_config.txt" | |
18 #set prep_output_config = "prep_output_config.txt" | |
19 #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window | |
20 #set windows_positions_by_chroms_config = "windows_positions_by_chroms_config.txt" | 16 #set windows_positions_by_chroms_config = "windows_positions_by_chroms_config.txt" |
21 #set perform_training = $perform_training_cond.perform_training | 17 #set perform_training = $perform_training_cond.perform_training |
22 ############################################## | |
23 ## Create the config file and prepare the data | |
24 ############################################## | |
25 #if str($output_heatmaps) == "yes": | |
26 mkdir '$output_pdf_dir' && | |
27 #end if | |
28 #if str($perform_training) == "yes": | |
29 #set output_dir = $output_training_dir | |
30 mkdir '$output_training_dir' && | |
31 #else: | |
32 #set output_dir = $output_txt_dir | |
33 mkdir '$output_txt_dir' && | |
34 #end if | |
35 cp '$gen_prep_input_config' $prep_input_config && | |
36 sort $prep_input_config -o $prep_input_config && | |
37 prepMat | |
38 $prep_input_config | |
39 #if str($specify_genomic_window) == "yes": | |
40 -bed '$specify_genomic_window_cond.bed_input' | |
41 #else: | |
42 -gsz '$chromInfo' | |
43 -wsz $specify_genomic_window_cond.window_size | |
44 #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes | |
45 #if str($restrict_chromosomes) == "yes": | |
46 #set chroms = [] | |
47 #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat | |
48 #for $i in $chrom_repeat.chrom | |
49 $chroms.append($i) | |
50 #end for | |
51 -chr ",".join(chroms) | |
52 #end if | |
53 #end if | |
54 $bychr | |
55 -c $reads_per_bp | |
56 #if str($blacklist_input) not in ["None", ""]: | |
57 -exclude '$blacklist_input' | |
58 #end if | |
59 $norm | |
60 &>prepmat_log.txt; | |
61 if [[ $? -ne 0 ]]; then | |
62 cp prepmat_log.txt '$output_dir'; | |
63 exit 1; | |
64 fi | |
65 ############################################## | |
66 ## Coerce the prepMat config output to the | |
67 ## format expected by IDEAS. | |
68 ############################################## | |
69 && cut -d' ' $prep_input_config -f1,2 > file1.txt | |
70 && ls tmp/*.bed.gz > file2.txt | |
71 && paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config | |
72 #if str($specify_genomic_window) == "yes": | |
73 ############################################## | |
74 ## Using a genomic window bed file, so categorize | |
75 ## the window positions by chromosome to enable | |
76 ## the IDEAS -inv option. | |
77 ############################################## | |
78 && cp '$gen_windows_positions_by_chroms_config' $windows_positions_by_chroms_config | |
79 #end if | |
80 && Rscript '$__tool_directory__/ideas.R' | 18 && Rscript '$__tool_directory__/ideas.R' |
81 #if str($specify_genomic_window) == "yes": | 19 --burnin_num $burnin_num |
82 --windows_bed '$specify_genomic_window_cond.bed_input' | 20 #if str($bychr) == "true": |
83 --windows_config $windows_positions_by_chroms_config | 21 --bychr true |
84 #end if | 22 #end if |
85 #if str($perform_training) == "yes": | 23 --chrom_bed_input $input.metadata.chrom_bed_input |
86 --training_iterations $perform_training_cond.training_iterations | 24 --chromosome_windows $input.metadata.chromosome_windows |
87 --training_windows $perform_training_cond.training_windows | 25 #if str($hp) == "true": |
88 #end if | 26 --hp true |
89 --prep_output_config '$prep_output_config' | 27 #end if |
90 --hp $hp | 28 #if str($initial_states) != "0": |
29 --initial_states $initial_states | |
30 #end if | |
31 --input $input | |
32 --input_files_path $input.extra_files_path | |
33 --ideas_input_config $input.metadata.ideas_input_config | |
91 #if str($log2) != "0.0": | 34 #if str($log2) != "0.0": |
92 --log2 $log2 | 35 --log2 $log2 |
93 #end if | 36 #end if |
37 #if str($maxerr) != "0.0": | |
38 --maxerr $maxerr | |
39 #end if | |
40 #if str($max_cell_type_clusters) != "0": | |
41 --max_cell_type_clusters $max_cell_type_clusters | |
42 #end if | |
43 #if str($max_position_classes) != "0": | |
44 --max_position_classes $max_position_classes | |
45 #end if | |
94 #if str($max_states) != "0.0": | 46 #if str($max_states) != "0.0": |
95 --max_states $max_states | 47 --max_states $max_states |
96 #end if | 48 #end if |
97 #if str($initial_states) != "0": | |
98 --initial_states $initial_states | |
99 #end if | |
100 #if str($max_position_classes) != "0": | |
101 --max_position_classes $max_position_classes | |
102 #end if | |
103 #if str($max_cell_type_clusters) != "0": | |
104 --max_cell_type_clusters $max_cell_type_clusters | |
105 #end if | |
106 #if str($prior_concentration) != "0.0": | |
107 --prior_concentration $prior_concentration | |
108 #end if | |
109 --burnin_num $burnin_num | |
110 --mcmc_num $mcmc_num | 49 --mcmc_num $mcmc_num |
111 #if str($minerr) != "0.0": | 50 #if str($minerr) != "0.0": |
112 --minerr $minerr | 51 --minerr $minerr |
113 #end if | 52 #end if |
114 #if str($maxerr) != "0.0": | 53 #if str($prior_concentration) != "0.0": |
115 --maxerr $maxerr | 54 --prior_concentration $prior_concentration |
116 #end if | 55 #end if |
117 --rseed $rseed | |
118 --thread \${GALAXY_SLOTS:-4} | |
119 --project_name '$project_name' | 56 --project_name '$project_name' |
120 #if str($save_ideas_log) == "yes": | 57 #if str($save_ideas_log) == "yes": |
121 --save_ideas_log $save_ideas_log | 58 --save_ideas_log $save_ideas_log |
122 --output_log '$output_log' | 59 --output_log '$output_log' |
60 #end if | |
61 #if str($standardize_datasets) == "true": | |
62 --standardize_datasets true | |
63 #end if | |
64 --rseed $rseed | |
65 --thread \${GALAXY_SLOTS:-4} | |
66 #if str($perform_training) == "yes": | |
67 --training_iterations $perform_training_cond.training_iterations | |
68 --training_windows $perform_training_cond.training_windows | |
123 #end if | 69 #end if |
124 #if str($perform_training) == "yes": | 70 #if str($perform_training) == "yes": |
125 && mv ./*.para0 '$output_dir' | 71 && mv ./*.para0 '$output_dir' |
126 && mv ./*.profile0 '$output_dir' | 72 && mv ./*.profile0 '$output_dir' |
127 #else: | 73 #else: |
138 #if str($perform_training) == "yes": | 84 #if str($perform_training) == "yes": |
139 --in_training_mode true | 85 --in_training_mode true |
140 #end if | 86 #end if |
141 #end if | 87 #end if |
142 ]]></command> | 88 ]]></command> |
143 <configfiles> | |
144 <configfile name="gen_prep_input_config"><![CDATA[#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract": | |
145 #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions | |
146 #for $i in $cell_type_epigenetic_factor_cond.input: | |
147 #set file_name_with_ext = $i.name | |
148 #if str($file_name_with_ext).find("http") >= 0 or str($file_name_with_ext).find("ftp") >= 0: | |
149 #set file_name_with_ext = $file_name_with_ext.split('/')[-1] | |
150 #end if | |
151 #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext | |
152 #set file_name = $file_name_with_ext.split(".")[0] | |
153 #if str($input_name_positions) == "cell_first": | |
154 #set cell_type_name = $file_name.split("-")[0] | |
155 #set epigenetic_factor_name = $file_name.split("-")[1] | |
156 #else: | |
157 #set cell_type_name = $file_name.split("-")[1] | |
158 #set epigenetic_factor_name = $file_name.split("-")[0] | |
159 #end if | |
160 ${cell_type_name} ${epigenetic_factor_name} ${i} | |
161 #end for | |
162 #else: | |
163 #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat: | |
164 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input} | |
165 #end for | |
166 #end if]]></configfile> | |
167 <configfile name="gen_windows_positions_by_chroms_config"><![CDATA[#if str($specify_genomic_window_cond.specify_genomic_window) == "yes": | |
168 #import collections | |
169 #set window_positions_by_chroms_odict = $collections.OrderedDict() | |
170 #for count, line in enumerate(open($specify_genomic_window_cond.bed_input.file_name, 'r')): | |
171 #set $line = $line.strip() | |
172 #if not $line or $line.startswith('#'): | |
173 #continue | |
174 #end if | |
175 #set items = $line.split('\t') | |
176 #if $items[0] in $window_positions_by_chroms_odict: | |
177 #set tup = $window_positions_by_chroms_odict[$items[0]] | |
178 #set $tup[1] += 1 | |
179 #set $window_positions_by_chroms_odict[$items[0]] = $tup | |
180 #else: | |
181 #set $window_positions_by_chroms_odict[$items[0]] = [$count, $count+1] | |
182 #end if | |
183 #end for | |
184 #for $chrom, $tup in $window_positions_by_chroms_odict.items(): | |
185 ${chrom} ${tup[0]} ${tup[1]} | |
186 #end for | |
187 #end if]]></configfile> | |
188 </configfiles> | |
189 <inputs> | 89 <inputs> |
190 <conditional name="perform_training_cond"> | 90 <conditional name="perform_training_cond"> |
191 <param name="perform_training" type="select" label="Perform training?"> | 91 <param name="perform_training" type="select" label="Perform training?"> |
192 <option value="yes" selected="true">Yes</option> | 92 <option value="yes" selected="true">Yes</option> |
193 <option value="no">No</option> | 93 <option value="no">No</option> |
196 <param name="training_iterations" type="integer" value="20" min="3" label="Number of training iterations"/> | 96 <param name="training_iterations" type="integer" value="20" min="3" label="Number of training iterations"/> |
197 <param name="training_windows" type="integer" value="10000" min="2" label="Number of randomly selected windows for training"/> | 97 <param name="training_windows" type="integer" value="10000" min="2" label="Number of randomly selected windows for training"/> |
198 </when> | 98 </when> |
199 <when value="no"/> | 99 <when value="no"/> |
200 </conditional> | 100 </conditional> |
201 <conditional name="cell_type_epigenetic_factor_cond"> | 101 <param name="input" type="data" format="ideaspre" label="Select IDEAS input"> |
202 <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by"> | |
203 <option value="extract" selected="true">extracting them from the selected input file names</option> | |
204 <option value="manual">manually setting them for each selected input</option> | |
205 </param> | |
206 <when value="extract"> | |
207 <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files"> | |
208 <validator type="empty_field"/> | |
209 <validator type="unspecified_build"/> | |
210 </param> | |
211 <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names"> | |
212 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option> | |
213 <option value="cell_last">Epigenetic factor name - Cell type name</option> | |
214 </param> | |
215 </when> | |
216 <when value="manual"> | |
217 <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1"> | |
218 <param name="cell_type_name" type="text" value="" label="Cell type name"> | |
219 <validator type="empty_field"/> | |
220 </param> | |
221 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name"> | |
222 <validator type="empty_field"/> | |
223 </param> | |
224 <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file"> | |
225 <validator type="empty_field"/> | |
226 <validator type="unspecified_build"/> | |
227 </param> | |
228 </repeat> | |
229 </when> | |
230 </conditional> | |
231 <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name"> | 102 <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name"> |
232 <validator type="empty_field"/> | 103 <validator type="empty_field"/> |
233 </param> | 104 </param> |
234 <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/> | 105 <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/> |
235 <conditional name="specify_genomic_window_cond"> | |
236 <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data"> | |
237 <option value="no" selected="true">No</option> | |
238 <option value="yes">Yes</option> | |
239 </param> | |
240 <when value="no"> | |
241 <param name="window_size" type="integer" value="200" label="Window size in base pairs"/> | |
242 <conditional name="restrict_chromosomes_cond"> | |
243 <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes"> | |
244 <option value="no" selected="true">No</option> | |
245 <option value="yes">Yes</option> | |
246 </param> | |
247 <when value="no"/> | |
248 <when value="yes"> | |
249 <repeat name="chrom_repeat" title="Chromosomes" min="1"> | |
250 <param name="chrom" type="text" value="" label="Chromosome" help="One chromosome (e.g., chr1, chr2, chrX) per text field"/> | |
251 </repeat> | |
252 </when> | |
253 </conditional> | |
254 </when> | |
255 <when value="yes"> | |
256 <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/> | |
257 </when> | |
258 </conditional> | |
259 <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/> | 106 <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/> |
260 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using"> | 107 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using"> |
261 <option value="6" selected="true">mean</option> | 108 <option value="6" selected="true">mean</option> |
262 <option value="8">max</option> | 109 <option value="8">max</option> |
263 </param> | 110 </param> |
264 <param name="blacklist_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/> | |
265 <param name="norm" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/> | |
266 <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/> | 111 <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/> |
267 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/> | 112 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/> |
268 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/> | 113 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/> |
269 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Positive integer"/> | 114 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Positive integer"/> |
270 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/> | 115 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/> |
271 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/> | 116 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/> |
272 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/> | 117 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/> |
118 <param name="standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/> | |
273 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/> | 119 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/> |
274 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/> | 120 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/> |
275 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/> | 121 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/> |
276 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero sets the maximum to a large number"/> | 122 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero sets the maximum to a large number"/> |
277 <param name="output_heatmaps" type="select" display="radio" label="Output heatmaps?"> | 123 <param name="output_heatmaps" type="select" display="radio" label="Output heatmaps?"> |
301 </collection> | 147 </collection> |
302 </outputs> | 148 </outputs> |
303 <tests> | 149 <tests> |
304 <test> | 150 <test> |
305 <param name="perform_training" value="no"/> | 151 <param name="perform_training" value="no"/> |
306 <param name="cell_type_epigenetic_factor" value="extract"/> | 152 <param name="input" value="input.ideaspre" ftype="ideaspre" dbkey="hg19"/> |
307 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/> | |
308 <param name="input_name_positions" value="cell_first"/> | |
309 <param name="specify_genomic_window" value="yes"/> | |
310 <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/> | |
311 <param name="project_name" value="IDEAS_out"/> | 153 <param name="project_name" value="IDEAS_out"/> |
312 <param name="initial_states" value="2"/> | 154 <param name="initial_states" value="2"/> |
313 <param name="maxerr" value="1000"/> | 155 <param name="maxerr" value="1000"/> |
314 <param name="output_heatmaps" value="no"/> | 156 <param name="output_heatmaps" value="no"/> |
315 <output_collection name="output_txt_collection" type="list"> | 157 <output_collection name="output_txt_collection" type="list"> |
317 <element name="IDEAS_out.chr1.para" file="IDEAS_out.para" ftype="txt"/> | 159 <element name="IDEAS_out.chr1.para" file="IDEAS_out.para" ftype="txt"/> |
318 <element name="IDEAS_out.chr1.profile" file="IDEAS_out.profile" ftype="txt"/> | 160 <element name="IDEAS_out.chr1.profile" file="IDEAS_out.profile" ftype="txt"/> |
319 <element name="IDEAS_out.chr1.state" file="IDEAS_out.state" ftype="txt"/> | 161 <element name="IDEAS_out.chr1.state" file="IDEAS_out.state" ftype="txt"/> |
320 </output_collection> | 162 </output_collection> |
321 <output name="output_log" file="output_log.txt" ftype="txt" compare="contains" /> | 163 <output name="output_log" file="output_log.txt" ftype="txt" compare="contains" /> |
322 </test> | |
323 <test> | |
324 <param name="perform_training" value="no"/> | |
325 <param name="cell_type_epigenetic_factor" value="manual"/> | |
326 <repeat name="input_repeat"> | |
327 <param name="cell_type_name" value="e001" /> | |
328 <param name="epigenetic_factor_name" value="h3k4me3"/> | |
329 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/> | |
330 </repeat> | |
331 <param name="specify_genomic_window" value="yes"/> | |
332 <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/> | |
333 <param name="project_name" value="IDEAS_out"/> | |
334 <param name="initial_states" value="2"/> | |
335 <param name="maxerr" value="1000"/> | |
336 <param name="output_heatmaps" value="no"/> | |
337 <output_collection name="output_txt_collection" type="list"> | |
338 <element name="IDEAS_out.chr1.cluster" file="IDEAS_out.cluster" ftype="txt"/> | |
339 <element name="IDEAS_out.chr1.para" file="IDEAS_out.para" ftype="txt"/> | |
340 <element name="IDEAS_out.chr1.profile" file="IDEAS_out.profile" ftype="txt"/> | |
341 <element name="IDEAS_out.chr1.state" file="IDEAS_out.state" ftype="txt"/> | |
342 </output_collection> | |
343 </test> | 164 </test> |
344 </tests> | 165 </tests> |
345 <help> | 166 <help> |
346 **What it does** | 167 **What it does** |
347 | 168 |