comparison ideas.xml @ 33:695053a23fe4 draft

Uploaded
author greg
date Tue, 22 Aug 2017 09:54:49 -0400
parents 58f5b2af9473
children 8d8f796a3bda
comparison
equal deleted inserted replaced
32:58f5b2af9473 33:695053a23fe4
67 ##&& Rscript '$__tool_directory__/build_matrix.R' 67 ##&& Rscript '$__tool_directory__/build_matrix.R'
68 ##-i $tmp_dir/*.bed.gz 68 ##-i $tmp_dir/*.bed.gz
69 ##-o $ideas_matrix_input_file 69 ##-o $ideas_matrix_input_file
70 ##-w $ideas_input_dir 70 ##-w $ideas_input_dir
71 ############################################## 71 ##############################################
72 ## Run IDEAS on the R matrix 72 ## Run IDEAS
73 ############################################## 73 ##############################################
74 && ideas 74 && ideas
75 '$prep_output_config' 75 '$prep_output_config'
76 $tmp_dir/*.bed 76 $tmp_dir/*.bed
77 #set smoother_annotation = $smoother_annotation_cond.smoother_annotation 77 #set smoother_annotation = $smoother_annotation_cond.smoother_annotation
84 #end if 84 #end if
85 #set in_windows = $in_windows_cond.in_windows 85 #set in_windows = $in_windows_cond.in_windows
86 #if str($in_windows) == 'yes': 86 #if str($in_windows) == 'yes':
87 -inv $window_start $window_end 87 -inv $window_start $window_end
88 #end if 88 #end if
89 #set log2_transformation = $log2_transformation_cond.log2_transformation 89 #if str($log2_num):
90 #if str($log2_transformation) == 'yes': 90 -log2 $log2_num
91 #set log2_num = $log2_transformation.log2_num 91 #end if
92 -log2 92 #if str($max_states):
93 #if str($log2_num) != '0': 93 -G $max_states
94 $log2_num 94 #end if
95 #end if 95 #if str($initial_states):
96 #end if 96 -C $initial_states
97 #set max_states_inferred = $max_states_inferred_cond.max_states_inferred 97 #end if
98 #if str($max_states_inferred) == 'yes': 98 #if str($max_position_classes):
99 -G $max_states_inferred_cond.max_states
100 #end if
101 #set num_initial_states = $num_initial_states_cond.num_initial_states
102 #if str($num_initial_states) == 'yes':
103 -C $num_initial_states_cond.initial_states
104 #end if
105 #if str($max_position_classes) != '0':
106 -P $max_position_classes 99 -P $max_position_classes
107 #end if 100 #end if
108 #if str($max_cell_type_clusters) != '0': 101 #if str($max_cell_type_clusters):
109 -K $max_cell_type_clusters 102 -K $max_cell_type_clusters
110 #end if 103 #end if
111 #if str($prior_concentration) != '0': 104 #if str($prior_concentration):
112 -A $prior_concentration 105 -A $prior_concentration
113 #end if 106 #end if
114 #set burnin_max_steps = $burnin_max_steps_cond.burnin_max_steps 107 -sample $burnin_num $mcmc_num
115 #if str($burnin_max_steps) == 'yes': 108 -minerr $min_standard_dev
116 -sample $burnin_max_steps_cond.burnin_num $burnin_max_steps_cond.mcmc_num 109 -maxerr $max_standard_dev
117 #end if
118 #set set_min_standard_dev = $set_min_standard_dev_cond.set_min_standard_dev
119 #if str($set_min_standard_dev) == 'yes':
120 -minerr $set_min_standard_dev_cond.min_standard_dev
121 #end if
122 #set set_max_standard_dev = $set_max_standard_dev_cond.set_max_standard_dev
123 #if str($set_max_standard_dev) == 'yes':
124 -maxerr $set_max_standard_dev_cond.max_standard_dev
125 #end if
126 -thread \${GALAXY_SLOTS:-4} 110 -thread \${GALAXY_SLOTS:-4}
127 > $output_log 111 > $output_log
128 && mv *.cluster $output_cluster 112 && if [ -f *.cluster ] ; then mv *.cluster $output_cluster
129 && mv *.para $output_para 113 && if [ -f *.para ] ; then mv *.para $output_para
130 && mv *.profile $output_profile 114 && if [ -f *.profile ] ; then mv *.profile $output_profile
131 && mv *.state $output_state 115 && if [ -f *.state ] ; then mv *.state $output_state
132 ]]></command> 116 ]]></command>
133 <configfiles> 117 <configfiles>
134 <configfile name="prep_input_config"><![CDATA[#for $input_items in $input_type_cond.input_repeat: 118 <configfile name="prep_input_config"><![CDATA[#for $input_items in $input_type_cond.input_repeat:
135 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input} 119 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input}
136 #end for ]]></configfile> 120 #end for ]]></configfile>
188 </param> 172 </param>
189 <when value="no"/> 173 <when value="no"/>
190 <when value="yes"/> 174 <when value="yes"/>
191 </conditional> 175 </conditional>
192 <param name="reads_per_bp" type="integer" value="1" min="1" max="8" label="Number of reads per base pair for calculating the average signal in each genomic window"/> 176 <param name="reads_per_bp" type="integer" value="1" min="1" max="8" label="Number of reads per base pair for calculating the average signal in each genomic window"/>
193 <conditional name="blacklist_regions_cond"> 177 <param name="blacklist_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/>
194 <param name="blacklist_regions" type="select" label="Select Bed file containing blacklist regions for exclusion">
195 <option value="no" selected="true">No</option>
196 <option value="yes">Yes</option>
197 </param>
198 <when value="no"/>
199 <when value="yes">
200 <param name="blacklist_input" type="data" format="bed" label="Bed file containing regions to exclude"/>
201 </when>
202 </conditional>
203 <conditional name="standardize_datasets_cond"> 178 <conditional name="standardize_datasets_cond">
204 <param name="standardize_datasets" type="select" display="radio" label="Standardize all datasets"> 179 <param name="standardize_datasets" type="select" display="radio" label="Standardize all datasets">
205 <option value="no" selected="true">No</option> 180 <option value="no" selected="true">No</option>
206 <option value="yes">Yes</option> 181 <option value="yes">Yes</option>
207 </param> 182 </param>
214 <option value="yes">Yes</option> 189 <option value="yes">Yes</option>
215 </param> 190 </param>
216 <when value="no"/> 191 <when value="no"/>
217 <when value="yes"/> 192 <when value="yes"/>
218 </conditional> 193 </conditional>
219 <conditional name="in_windows_cond"> 194 <param name="log2_num" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero value has no affect"/>
220 <param name="in_windows" type="select" display="radio" label="Run IDEAS only within defined windows in the input data"> 195 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero value has no affect"/>
221 <option value="no" selected="true">No</option> 196 <param name="initial_states" type="integer" value="20" min="1" label="Initial number of states" help="Zero value has no affect"/>
222 <option value="yes">Yes</option>
223 </param>
224 <when value="no"/>
225 <when value="yes">
226 <param name="window_start" type="integer" value="0" min="0" label="Window start" help="Zero-based"/>
227 <param name="window_end" type="integer" value="0" min="0" label="Window end" help="Zero-based"/>
228 </when>
229 </conditional>
230 <conditional name="log2_transformation_cond">
231 <param name="log2_transformation" type="select" label="Perform Log2-transformation of the input data">
232 <option value="no" selected="true">No</option>
233 <option value="yes">Yes</option>
234 </param>
235 <when value="no"/>
236 <when value="yes">
237 <param name="log2_num" type="float" value="0" min="0" max="1" label="Enter a number to use log2(x+num) transformation" help="Zero value has no affect"/>
238 </when>
239 </conditional>
240 <conditional name="max_states_inferred_cond">
241 <param name="max_states_inferred" type="select" label="Set the maximum number of states to be inferred">
242 <option value="no" selected="true">No</option>
243 <option value="yes">Yes</option>
244 </param>
245 <when value="no"/>
246 <when value="yes">
247 <param name="max_states" type="float" value="1" min="1" label="Maximum number of states to be inferred"/>
248 </when>
249 </conditional>
250 <conditional name="num_initial_states_cond">
251 <param name="num_initial_states" type="select" label="Set the initial number of states">
252 <option value="no" selected="true">No</option>
253 <option value="yes">Yes</option>
254 </param>
255 <when value="no"/>
256 <when value="yes">
257 <param name="initial_states" type="integer" value="20" min="1" label="Initial number of states"/>
258 </when>
259 </conditional>
260 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero value has no affect"/> 197 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero value has no affect"/>
261 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero value has no affect"/> 198 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero value has no affect"/>
262 <param name="prior_concentration" type="float" value="0" min="0" label="Prior concentration" help="Zero value results in the default value: sqrt(number of cell types)"/> 199 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default value: sqrt(number of cell types)"/>
263 <conditional name="burnin_max_steps_cond"> 200 <param name="burnin_num" type="integer" value="50" min="1" label="Number of burnin steps"/>
264 <param name="burnin_max_steps" type="select" label="Set the the number of burnin and maximization steps"> 201 <param name="mcmc_num" type="integer" value="50" min="1" label="Number of maximization steps"/>
265 <option value="no" selected="true">No</option> 202 <param name="min_standard_dev" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default value: 0.5"/>
266 <option value="yes">Yes</option> 203 <param name="max_standard_dev" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero value results in the default value: infinity"/>
267 </param>
268 <when value="no"/>
269 <when value="yes">
270 <param name="burnin_num" type="integer" value="50" min="1" label="Number of burnin steps"/>
271 <param name="mcmc_num" type="integer" value="50" min="1" label="Number of maximization steps"/>
272 </when>
273 </conditional>
274 <conditional name="set_min_standard_dev_cond">
275 <param name="set_min_standard_dev" type="select" label="Set the minimum standard deviation for the emission Gaussian distribution?">
276 <option value="no" selected="true">No</option>
277 <option value="yes">Yes</option>
278 </param>
279 <when value="no"/>
280 <when value="yes">
281 <param name="min_standard_dev" type="float" value="0.5" label="Minimum standard deviation for the emission Gaussian distribution"/>
282 </when>
283 </conditional>
284 <conditional name="set_max_standard_dev_cond">
285 <param name="set_max_standard_dev" type="select" label="Set the maximum standard deviation for the emission Gaussian distribution?">
286 <option value="no" selected="true">No</option>
287 <option value="yes">Yes</option>
288 </param>
289 <when value="no"/>
290 <when value="yes">
291 <param name="max_standard_dev" type="float" value="100000000" label="Maximum standard deviation for the emission Gaussian distribution"/>
292 </when>
293 </conditional>
294 </inputs> 204 </inputs>
295 <outputs> 205 <outputs>
296 <data name="output_log" format="txt" label="${tool.name} (ideas output log) on ${on_string}"/> 206 <data name="output_log" format="txt" label="${tool.name} (ideas output log) on ${on_string}"/>
297 <data name="output_cluster" format="txt" label="${tool.name} (local cell type clustering) on ${on_string}"/> 207 <data name="output_cluster" format="txt" label="${tool.name} (local cell type clustering) on ${on_string}"/>
298 <data name="output_para" format="tabular" label="${tool.name} (epigenetic state frequency, mean and variance parameters) on ${on_string}"/> 208 <data name="output_para" format="tabular" label="${tool.name} (epigenetic state frequency, mean and variance parameters) on ${on_string}"/>
333 * **Bed file specifying the genomic windows** - bed file specifying the genomic windows (if "Yes" is selected) 243 * **Bed file specifying the genomic windows** - bed file specifying the genomic windows (if "Yes" is selected)
334 244
335 **Other options** 245 **Other options**
336 246
337 * **Output chromosomes in seperate files** - select "Yes" to produce seperate files for each chromosome, allowing you to run IDEAS on different chromosomes separately. 247 * **Output chromosomes in seperate files** - select "Yes" to produce seperate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
338 * **Select Bed file containing blacklist regions for exclusion** - select a Bed file that contains regions you'd like excluded from your datasets. 248 * **Select file(s) containing regions to exclude** - select one or more bed files that contains regions you'd like excluded from your datasets.
339 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change. 249 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
340 250
341 * **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision. 251 * **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision.
342 * **Run IDEAS only within defined windows in the input data** - select "Yes" to Run IDEAS only in windows between zero-based start and end indexes of windows in the input data. 252 * **Use log2(x+number) transformation** - perform Log2-transformation of the input data by log2(x+number) (recommended for read count data to reduce skewness). You can enter a number less than 1. For example, if your input data is mean read count per window, using 0.1 may produce better results.
343 * **Perform Log2-transformation of the input data** - select "Yes" to perform Log2-transformation of the input data by log2(x+1) (recommended for read count data to reduce skewness). You can optionally enter a number less than 1 to direct IDEAS to produce log2(x+num) transformation. For example, if your input data is mean read count per window, then 1 may be too large, but using 0.1 may be more reasonable. 253 * **Maximum number of states to be inferred** - restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified
344 * **Set the maximum number of states to be inferred** - select "Yes" to restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified 254 * **Initial number of states** - while IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.
345 * **Set the initial number of states** - select "Yes" if the number of states you expect to generate is greater than 20. While IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.
346 * **Maximum number of position classes to be inferred** - Set this value only if: 255 * **Maximum number of position classes to be inferred** - Set this value only if:
347 256
348 * you do not want position classes (e.g., for testing purposes), in this case set the value t0 1 257 * you do not want position classes (e.g., for testing purposes), in this case set the value to 1
349 * IDEAS runs slow because there are too many position classes, generally less than 100 position classes will run fine 258 * IDEAS runs slow because there are too many position classes, generally less than 100 position classes will run fine
350 259
351 * **Maximum number of cell type clusters allowed** - Set this value only for testing. If you set the value to 1, then all cell types will be clustered in one group. 260 * **Maximum number of cell type clusters allowed** - Set this value only for testing. If you set the value to 1, then all cell types will be clustered in one group.
352 * **Prior concentration** - specify the prior concentration parameter; default is A=sqrt(number of cell types). A smaller concentration parameter (e.g., 1 or less) will emphasize more on position specificity and a larger concentration parameter (e.g., 10 * number of cell types) will emphasize more on global homogeneity. 261 * **Prior concentration** - specify the prior concentration parameter; default is A=sqrt(number of cell types). A smaller concentration parameter (e.g., 1 or less) will emphasize more on position specificity and a larger concentration parameter (e.g., 10 * number of cell types) will emphasize more on global homogeneity.
353 * **Set the the number of burnin and maximization steps** - specify the number of burnin and maximization steps; default it is 50 50. Increasing these two numbers will increase computing and only slightly increase accuracy. Decreasing these two numbers will reduce computing but may also reduce accuracy. We recommend to run IDEAS with at least 20 burnins and 20 maximizations. IDEAS will not stop even if it reaches a maximum mode. 262 * **Set the the number of burnin and maximization steps** - specify the number of burnin and maximization steps; default it is 50 50. Increasing these two numbers will increase computing and only slightly increase accuracy. Decreasing these two numbers will reduce computing but may also reduce accuracy. We recommend to run IDEAS with at least 20 burnins and 20 maximizations. IDEAS will not stop even if it reaches a maximum mode.
354 * **Set the minimum standard deviation for the emission Gaussian distribution** - specify the minimum standard deviation for the emission Gaussian distribution. 263 * **Minimum standard deviation for the emission Gaussian distribution** - you should change the default value of 0.5 if the standard deviation of your data is much smaller or much larger than 1. The first line of the output produced by IDEAS is **ysd=xxx**, which is the total standard deviation of your data. If that value is less than 0.5, you may set the minimum standard deviation to an even smaller number (e.g., xxx/2). If the standard deviation of your data is much greater than 1, (e.g., 20), you may set the minimum standard deviation to a larger value, (e.g., 5). Modifying the minimum standard deviation in the former case is more necessary than in the latter case because otherwise you may end up finding no interesting segmentations. We do not recommend setting the minimum standard deviation to be 0 or smaller, as doing so may capture some artificial and uninteresting states due to tightly clustered data, such as 0 in read counts.
355 264 * **Maximim standard deviation for the emission Gaussian distribution** - if you want to find fine-grained states you may use this option (if not used, IDEAS uses infinity), but it is rearely used unless you need more states to be inferred.
356 * **Minimum standard deviation for the emission Gaussian distribution** - you should change the default minerr value of 0.5 if the standard deviation of your data is much smaller or much larger than 1. The first line of the output produced by IDEAS is **ysd=xxx**, which is the total standard deviation of your data. If that value is less than 0.5, you may set the minimum standard deviation to an even smaller number (e.g., xxx/2). If the standard deviation of your data is much greater than 1, (e.g., 20), you may set the minimum standard deviation to a larger value, (e.g., 5). Modifying the minimum standard deviation in the former case is more necessary than in the latter case because otherwise you may end up finding no interesting segmentations. We do not recommend setting the minimum standard deviation to be 0 or smaller, as doing so may capture some artificial and uninteresting states due to tightly clustered data, such as 0 in read counts.
357
358 * **Set the maximum standard deviation for the emission Gaussian distribution** - specify the maximim standard deviation for the emission Gaussian distribution.
359
360 * **Maximim standard deviation for the emission Gaussian distribution** - if you want to find fine-grained states you may use this option (if not used, IDEAS uses infinity), but it is rearely used unless you need more states to be inferred.
361 265
362 </help> 266 </help>
363 <citations> 267 <citations>
364 <citation type="doi">10.1093/nar/gkw278</citation> 268 <citation type="doi">10.1093/nar/gkw278</citation>
365 </citations> 269 </citations>