annotate ideas.xml @ 0:b785bcfe5cd0 draft default tip

Uploaded
author greg
date Mon, 12 Feb 2018 09:52:26 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
1 <tool id="ideas" name="IDEAS" version="1.2.0">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
2 <description>accounts for position dependent epigenetic events and detects local cell type relationships</description>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
3 <requirements>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="2.26.0">bedtools</requirement>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="332">ucsc-bedgraphtobigwig</requirement>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="332">ucsc-bedsort</requirement>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="332">ucsc-bigwigaverageoverbed</requirement>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
8 <requirement type="package" version="1.20">ideas</requirement>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
9 <requirement type="package" version="1.10.4">r-data.table</requirement>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
10 <requirement type="package" version="1.4.4">r-optparse</requirement>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
11 </requirements>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
12 <command detect_errors="exit_code"><![CDATA[
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
13 #import os
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
14 #set perform_training = $perform_training_cond.perform_training
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
15
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
16 ## Extract the input's compressed tmp directory archive.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
17 tar -xzf $input.metadata.tmp_archive &&
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
18
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
19 ## Define and create output directories.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
20 #set output_pdf_dir = "output_pdf_dir"
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
21 #set output_txt_dir = "output_txt_dir"
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
22 #set output_training_dir = "output_training_dir"
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
23 #if str($output_heatmaps) == "yes":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
24 mkdir '$output_pdf_dir' &&
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
25 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
26 #if str($perform_training) == "yes":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
27 #set output_dir = $output_training_dir
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
28 mkdir '$output_training_dir' &&
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
29 #else:
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
30 #set output_dir = $output_txt_dir
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
31 mkdir '$output_txt_dir' &&
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
32 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
33
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
34 Rscript '$__tool_directory__/ideas.R'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
35 --burnin_num $burnin_num
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
36 #if str($bychr) == "true":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
37 --bychr true
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
38 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
39 #if str($input.metadata.chrom_bed) not in ['', 'None']:
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
40 --chrom_bed_input $input.metadata.chrom_bed
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
41 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
42 #if str($input.metadata.chrom_windows) not in ['' 'None']:
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
43 --chromosome_windows $input.metadata.chrom_windows
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
44 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
45 #if str($hp) == "true":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
46 --hp true
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
47 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
48 #if str($initial_states) != "0":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
49 --initial_states $initial_states
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
50 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
51 --ideas_input_config $input.metadata.input_config
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
52 #if str($log2) != "0.0":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
53 --log2 $log2
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
54 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
55 #if str($maxerr) != "0.0":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
56 --maxerr $maxerr
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
57 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
58 #if str($max_cell_type_clusters) != "0":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
59 --max_cell_type_clusters $max_cell_type_clusters
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
60 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
61 #if str($max_position_classes) != "0":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
62 --max_position_classes $max_position_classes
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
63 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
64 #if str($max_states) != "0.0":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
65 --max_states $max_states
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
66 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
67 --mcmc_num $mcmc_num
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
68 #if str($minerr) != "0.0":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
69 --minerr $minerr
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
70 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
71 --output_dir $output_dir
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
72 #if str($prior_concentration) != "0.0":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
73 --prior_concentration $prior_concentration
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
74 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
75 --project_name '$project_name'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
76 #if str($save_ideas_log) == "yes":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
77 --save_ideas_log $save_ideas_log
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
78 --output_log '$output_log'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
79 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
80 #if str($standardize_datasets) == "true":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
81 --standardize_datasets true
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
82 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
83 --rseed $rseed
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
84 --thread \${GALAXY_SLOTS:-4}
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
85 #if str($perform_training) == "yes":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
86 --training_iterations $perform_training_cond.training_iterations
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
87 --training_windows $perform_training_cond.training_windows
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
88 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
89 #if str($perform_training) == "yes":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
90 && mv ./*.para0 '$output_dir'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
91 && mv ./*.profile0 '$output_dir'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
92 #else:
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
93 && mv ./*.para '$output_dir'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
94 && mv ./*.profile '$output_dir'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
95 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
96 && mv ./*.cluster '$output_dir'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
97 && mv ./*.state '$output_dir'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
98 #if str($output_heatmaps) == "yes":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
99 && Rscript '$__tool_directory__/create_heatmaps.R'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
100 --input_dir '$output_dir'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
101 --output_dir '$output_pdf_dir'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
102 --script_dir '$__tool_directory__'
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
103 #if str($perform_training) == "yes":
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
104 --in_training_mode true
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
105 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
106 #end if
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
107 ]]></command>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
108 <inputs>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
109 <conditional name="perform_training_cond">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
110 <param name="perform_training" type="select" label="Perform training?">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
111 <option value="yes" selected="true">Yes</option>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
112 <option value="no">No</option>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
113 </param>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
114 <when value="yes">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
115 <param name="training_iterations" type="integer" value="20" min="3" label="Number of training iterations"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
116 <param name="training_windows" type="integer" value="10000" min="2" label="Number of randomly selected windows for training"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
117 </when>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
118 <when value="no"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
119 </conditional>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
120 <param name="input" type="data" format="ideaspre" label="Select IDEAS input"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
121 <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
122 <validator type="empty_field"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
123 </param>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
124 <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
125 <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
126 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each window using">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
127 <option value="6" selected="true">mean</option>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
128 <option value="8">max</option>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
129 </param>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
130 <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
131 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
132 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
133 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Positive integer"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
134 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
135 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
136 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
137 <param name="standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
138 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
139 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
140 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
141 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero sets the maximum to a large number"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
142 <param name="output_heatmaps" type="select" display="radio" label="Output heatmaps?">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
143 <option value="yes" selected="true">Yes</option>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
144 <option value="no">No</option>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
145 </param>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
146 <param name="save_ideas_log" type="select" display="radio" label="Save IDEAS log in an additional history item">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
147 <option value="no" selected="true">No</option>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
148 <option value="yes">Yes</option>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
149 </param>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
150 </inputs>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
151 <outputs>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
152 <data name="output_log" format="txt" label="${tool.name} (output log) on ${on_string}">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
153 <filter>save_ideas_log == 'yes'</filter>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
154 </data>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
155 <collection name="output_pdf_collection" type="list" label="${tool.name} (heatmaps) on ${on_string}">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
156 <discover_datasets pattern="__name__" directory="output_pdf_dir" format="pdf"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
157 <filter>output_heatmaps == 'yes'</filter>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
158 </collection>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
159 <collection name="output_txt_collection" type="list">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
160 <discover_datasets pattern="__name__" directory="output_txt_dir" format="txt"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
161 <filter>perform_training_cond['perform_training'] == 'no'</filter>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
162 </collection>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
163 <collection name="output_training_collection" type="list">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
164 <discover_datasets pattern="__name__" directory="output_training_dir" format="txt"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
165 <filter>perform_training_cond['perform_training'] == 'yes'</filter>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
166 </collection>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
167 </outputs>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
168 <tests>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
169 <test>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
170 <param name="perform_training" value="yes"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
171 <param name="training_iterations" value="3"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
172 <param name="input" value="ideas_test1/input.html" dbkey="hg19" ftype="ideaspre">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
173 <!--
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
174 The order is critical here - it must be the same as is displayed on the upload form!
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
175 Also, there seems to be a bug with the composite upload form tab. All datasets must
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
176 be selected whether they are optional or not. Here the chromosome_windows.txt file
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
177 was generated during a manual execution of ideas_preprocessor tool, specifying chrom
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
178 windows.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
179 -->
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
180 <composite_data value='ideas_test1/chromosome_windows.txt'/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
181 <composite_data value='ideas_test1/chromosomes.bed'/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
182 <composite_data value='ideas_test1/IDEAS_input_config.txt'/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
183 <composite_data value='ideas_test1/tmp.tar.gz'/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
184 </param>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
185 <param name="output_heatmaps" value="yes"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
186 <param name="project_name" value="IDEAS_out"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
187 <output_collection name="output_training_collection" type="list">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
188 <element name="IDEAS_out.chr1.cluster" file="IDEAS_out.chr1.cluster" ftype="txt"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
189 <element name="IDEAS_out.chr2.cluster" file="IDEAS_out.chr2.cluster" ftype="txt"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
190 <element name="IDEAS_out.chr1.state" file="IDEAS_out.chr1.state" ftype="txt"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
191 <element name="IDEAS_out.chr2.state" file="IDEAS_out.chr2.state" ftype="txt"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
192 <element name="IDEAS_out.para0" file="IDEAS_out.para0" ftype="txt"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
193 <element name="IDEAS_out.profile0" file="IDEAS_out.profile0" ftype="txt" compare="contains"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
194 </output_collection>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
195 <output_collection name="output_pdf_collection" type="list">
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
196 <element name="IDEAS_out.state.1.pdf" file="IDEAS_out.state.1.pdf" ftype="pdf" compare="contains"/>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
197 </output_collection>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
198 </test>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
199 </tests>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
200 <help>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
201 **What it does**
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
202
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
203 IDEAS (an **I**\ ntegrative and **D**\ iscriminative **E**\ pigenome **A**\ nnotation **S**\ ystem) identifies
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
204 de novo regulatory functions from epigenetic data in multiple cell types jointly. It is a full probabilistic
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
205 model defined on all data, and it combines signals across both the genome and cell types to boost power. The
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
206 underlying assumption of IDEAS is that, because all cell types share the same underlying DNA sequences,
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
207 **functions of each DNA segment should be correlated**. Also, cell type specific regulation is locus-dependent,
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
208 and so IDEAS uses local epigenetic landscape to **identify de novo and local cell type clusters** without
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
209 assuming or requiring a known global cell type relationship.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
210
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
211 The input is a single dataset with the **IdeasPre** datatype, which is produced by the IDEAS Preprocessor tool.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
212
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
213 .. image:: $PATH_TO_IMAGES/ideas.png
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
214
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
215 IDEAS predicts regulatory functions, denoted by epigenetic states, at each position in each cell type by
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
216 **combining information simultaneously learned from other cell types** at the same positions in cell types with
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
217 similar local epigenetic landscapes. Size of genomic intervals for determining the similarity are also learned.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
218 All of the inferences are done through parallel infinite-state hidden Markov models (iHMM), which is a Bayesian
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
219 non-parametric technique to automatically determine the number of local cell type clusters and the number of
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
220 epigenetic states.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
221
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
222 In addition to its improved power, IDEAS has two unique advantages:
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
223
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
224 1) applies **linear time inference** with respect to the number of cell types, which allows it to study hundreds or more cell types jointly
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
225 2) uses mini-batch training to **improve reproducibility** of the predicted epigenetic states, which is important because genome segmentation is not convex and hence cannot guarantee a global optimal solution.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
226
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
227 -----
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
228
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
229 **Options**
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
230
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
231 * **Perform training** - select "Yes" to run the specified number of training iterations, running IDEAS with the parameter values and producing outputs. After training, these outputs are combined into a single dataset which is then used in conjunction with the inputs for the actual analysis. This process improves the accuracy of the final results.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
232
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
233 * **Number of training iterations** - the number of times to execute IDEAS with the specified parameter values on the selected inputs to produce the training results. The minimum number of iterations is 3.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
234 * **Number of randomly selected windows for training** - the number of chromosome windows within the input datasets from which to randomly select data for training.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
235
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
236 * **Set cell type and epigenetic factor names by** - cell type and epigenetic factor names can be set manually or by extracting them from the names of the selected input datasets. The latter case requires all selected datasets to have names that contain a "-" character.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
237
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
238 * **BAM or BigWig files** - select one or more Bam or Bigwig files from your history, making sure that the name of every selected input include a "-" character (e.g., e001-h3k4me3.bigwig).
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
239 * **Cell type, Epigenetic factor and Input** - manually select any number of inputs, setting the cell type and epigenetic factor name for each. The combination of "cell type name" and "epigenetic factor name" must be unique for each input. For example, if you have replicate data you may want to specify the cell name as "rep1", "rep2", etc and the factor name as "rep1", "rep2", etc.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
240
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
241 * **Cell type name** - cell type name
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
242 * **Epigenetic factor name** - epigenetic factor name
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
243 * **BAM or BigWig file** - BAM or BigWig file
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
244
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
245 * **Project name** - datasets produced by IDEAS will have this base name.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
246 * **Seed for IDEAS model initialization** - enter an integer to be used as the seed for the IDEAS model initialization. A zero value causes IDEAS to automatically generate a random seed, and this seed will be different for each job run.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
247 * **Output chromosomes in separate files** - select "Yes" to produce separate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
248 * **Calculate the signal in each window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate the signal (i.e., the number of reads per bp) in each window.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
249 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
250 * **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
251 * **Use log2(x+number) transformation** - perform Log2-transformation of the input data by log2(x+number) (recommended for read count data to reduce skewness). You can enter a number that is representative of the noise level in your data (e.g., a number less than 1). If this number is at a similar scale or larger than the signal in your data, it will lose power. For example, if your input data is mean read count per window, using 0.1 may produce better results.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
252 * **Maximum number of states to be inferred** - restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
253 * **Initial number of states** - while IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
254 * **Maximum number of position classes to be inferred** - Set this value only if:
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
255
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
256 * you do not want position classes (e.g., for testing purposes), in this case set the value to 1
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
257 * IDEAS runs slow because there are too many position classes, generally less than 100 position classes will run fine
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
258
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
259 * **Maximum number of cell type clusters allowed** - If you set the value to 1, then all cell types will be clustered in one group, which may be desirable if all cell types are homogeneous and you want IDEAS to use information in all cell types equally.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
260 * **Prior concentration** - specify the prior concentration parameter; default is A=sqrt(number of cell types). A smaller concentration parameter (e.g., 1 or less) will emphasize more on position specificity and a larger concentration parameter (e.g., 10 * number of cell types) will emphasize more on global homogeneity.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
261 * **Number of burnin steps** - specify the number of burnin steps; default is 20. Increasing the burnin and maximization steps will increase computing and only slightly increase accuracy, while decreasing them will reduce computing resources but may also reduce accuracy. We recommend to run IDEAS with at least 20 burnins and 20 maximizations. IDEAS will not stop even if it reaches a maximum mode.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
262 * **Number of maximization steps** - specify the number of maximization steps; default is 20.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
263 * **Minimum standard deviation for the emission Gaussian distribution** - This number multiplied by the overall standard deviation of your data will be used as a lower bound for the standard deviation for each factor in each epigenetic state (the default is 0.5). This number is useful for removing very subtle clusters in the data. Setting this value near 0 will allow IDEAS to discover many subtle states, while setting it greater than 1 will result in IDEAS losing the ability to detect meaningful states.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
264 * **Maximim standard deviation for the emission Gaussian distribution** - if you want to find fine-grained states you may use this option (if not used, IDEAS uses infinity), but it is rearely used unless you need more states to be inferred.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
265 * **Output heatmaps** - select "Yes" to produce an additional dataset collection consisting of PDF datasets, one for each dataset with a .para extension in the primary IDEAS output dataset collection.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
266 * **Save IDEAS log in an additional history item** - select "Yes" to produce an additional history item that contains the entire IDEAS processing log.
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
267 </help>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
268 <citations>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
269 <citation type="doi">10.1093/nar/gkw278</citation>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
270 </citations>
b785bcfe5cd0 Uploaded
greg
parents:
diff changeset
271 </tool>