annotate ideas.xml @ 100:86445eab5f51 draft

Uploaded
author greg
date Mon, 16 Oct 2017 08:24:05 -0400
parents 585557b96a9a
children ad45c3ba16b2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d75cbb2db2c4 Uploaded
greg
parents: 0
diff changeset
1 <tool id="ideas" name="IDEAS" version="1.2.0">
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
2 <description>accounts for position dependent epigenetic events and detects local cell type relationships</description>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
3 <requirements>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="2.26.0">bedtools</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="332">ucsc-bedgraphtobigwig</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="332">ucsc-bedsort</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="332">ucsc-bigwigaverageoverbed</requirement>
100
86445eab5f51 Uploaded
greg
parents: 99
diff changeset
8 <requirement type="package" version="1.20">ideas</requirement>
20
a70690dcf9ff Uploaded
greg
parents: 14
diff changeset
9 <requirement type="package" version="1.3.2">r-optparse</requirement>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
10 </requirements>
30
1d854705de39 Uploaded
greg
parents: 29
diff changeset
11 <command detect_errors="exit_code"><![CDATA[
66
dc5d5a08c370 Uploaded
greg
parents: 65
diff changeset
12 #set tmp_dir = "tmp"
72
6dece3e06a10 Uploaded
greg
parents: 71
diff changeset
13 #set prep_input_config = "prep_input_config.txt"
66
dc5d5a08c370 Uploaded
greg
parents: 65
diff changeset
14 #set prep_output_config = "prep_output_config.txt"
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
15 ##############################################
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
16 ## Create the config file and prepare the data
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
17 ##############################################
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
18 #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
19 cp '$gen_prep_input_config' $prep_input_config &&
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
20 prepMat
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
21 $prep_input_config
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
22 #if str($specify_genomic_window) == "yes":
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
23 -bed '$specify_genomic_window_cond.bed_input'
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
24 #else:
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
25 -gsz '$chromInfo'
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
26 -wsz $specify_genomic_window_cond.window_size
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
27 #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
28 #if str($restrict_chromosomes) == "yes":
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
29 #set chroms = []
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
30 #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
31 #for $i in $chrom_repeat.chrom
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
32 $chroms.append($i)
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
33 #end for
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
34 -chr ",".join(chroms)
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
35 #end if
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
36 #end if
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
37 $bychr
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
38 -c $reads_per_bp
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
39 #if str($blacklist_input) not in ["None", ""]:
34
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
40 -exclude '$blacklist_input'
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
41 #end if
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
42 $norm
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
43 ##############################################
29
06522a4aae6d Uploaded
greg
parents: 28
diff changeset
44 ## Coerce the prepMat config output to the
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
45 ## format expected by IDEAS.
26
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
46 ##############################################
65
e29e083ebef7 Uploaded
greg
parents: 64
diff changeset
47 && cut -d' ' $prep_input_config -f1,2 > file1.txt
29
06522a4aae6d Uploaded
greg
parents: 28
diff changeset
48 && ls tmp/*.bed.gz > file2.txt
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
49 && paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
50 ##############################################
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
51 ## Run IDEAS
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
52 ##############################################
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
53 && ideas
25
0169856fe7bb Uploaded
greg
parents: 24
diff changeset
54 '$prep_output_config'
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
55 #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
56 #if str($specify_genomic_window) == "yes":
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
57 '$specify_genomic_window_cond.bed_input'
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
58 #else:
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
59 $tmp_dir/*.bed
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
60 #end if
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
61 $hp
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
62 #if str($log2) != "0.0":
47
3a4697f71a05 Uploaded
greg
parents: 46
diff changeset
63 -log2 $log2
3a4697f71a05 Uploaded
greg
parents: 46
diff changeset
64 #end if
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
65 #if str($max_states) != "0.0":
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
66 -G $max_states
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
67 #end if
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
68 #if str($initial_states) != "0":
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
69 -C $initial_states
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
70 #end if
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
71 #if str($max_position_classes) != "0":
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
72 -P $max_position_classes
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
73 #end if
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
74 #if str($max_cell_type_clusters) != "0":
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
75 -K $max_cell_type_clusters
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
76 #end if
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
77 #if str($prior_concentration) != "0.0":
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
78 -A $prior_concentration
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
79 #end if
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
80 -sample $burnin_num $mcmc_num
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
81 #if str($minerr) != "0.0":
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
82 -minerr $minerr
42
46445e3dc9e2 Uploaded
greg
parents: 41
diff changeset
83 #end if
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
84 #if str($maxerr) != "0.0":
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
85 -maxerr $maxerr
42
46445e3dc9e2 Uploaded
greg
parents: 41
diff changeset
86 #end if
93
0c2cf49dfb58 Uploaded
greg
parents: 91
diff changeset
87 -rseed $rseed
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
88 -thread \${GALAXY_SLOTS:-4}
89
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
89 #if str($save_ideas_log) == "yes":
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
90 > $output_log
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
91 #else:
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
92 > /dev/null
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
93 #end if
38
3421fff414de Uploaded
greg
parents: 37
diff changeset
94 && mv ./*.cluster $output_cluster
3421fff414de Uploaded
greg
parents: 37
diff changeset
95 && mv ./*.para $output_para
3421fff414de Uploaded
greg
parents: 37
diff changeset
96 && mv ./*.profile $output_profile
3421fff414de Uploaded
greg
parents: 37
diff changeset
97 && mv ./*.state $output_state
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
98 ]]></command>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
99 <configfiles>
82
f6156d2888a3 Uploaded
greg
parents: 81
diff changeset
100 <configfile name="gen_prep_input_config"><![CDATA[#import os
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
101 #if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract":
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
102 #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
103 #for $i in $cell_type_epigenetic_factor_cond.input:
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
104 #set file_name_with_ext = $i.name
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
105 #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
106 #set file_name = $file_name_with_ext.split(".")[0]
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
107 #if str($input_name_positions) == "cell_first":
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
108 #set cell_type_name = $file_name.split("-")[0]
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
109 #set epigenetic_factor_name = $file_name.split("-")[1]
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
110 #else:
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
111 #set cell_type_name = $file_name.split("-")[1]
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
112 #set epigenetic_factor_name = $file_name.split("-")[0]
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
113 #end if
79
20d4dc694105 Uploaded
greg
parents: 78
diff changeset
114 ${cell_type_name} ${epigenetic_factor_name} ${i}
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
115 #end for
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
116 #else:
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
117 #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat:
80
94182e72e08d Uploaded
greg
parents: 79
diff changeset
118 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input}
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
119 #end for
71
4afd642aa3ce Uploaded
greg
parents: 70
diff changeset
120 #end if]]></configfile>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
121 </configfiles>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
122 <inputs>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
123 <conditional name="cell_type_epigenetic_factor_cond">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
124 <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
125 <option value="extract" selected="true">extracting them from the selected input file names</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
126 <option value="manual">manually setting them for each selected input</option>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
127 </param>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
128 <when value="extract">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
129 <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
130 <validator type="empty_field"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
131 <validator type="unspecified_build"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
132 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
133 <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
134 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
135 <option value="cell_last">Epigenetic factor name - Cell type name</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
136 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
137 </when>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
138 <when value="manual">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
139 <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
140 <param name="cell_type_name" type="text" value="" label="Cell type name">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
141 <validator type="empty_field"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
142 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
143 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
144 <validator type="empty_field"/>
32
58f5b2af9473 Uploaded
greg
parents: 31
diff changeset
145 </param>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
146 <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
147 <validator type="empty_field"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
148 <validator type="unspecified_build"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
149 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
150 </repeat>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
151 </when>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
152 </conditional>
99
585557b96a9a Uploaded
greg
parents: 98
diff changeset
153 <param argument="-rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
154 <conditional name="specify_genomic_window_cond">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
155 <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
156 <option value="no" selected="true">No</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
157 <option value="yes">Yes</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
158 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
159 <when value="no">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
160 <param name="window_size" type="integer" value="200" label="Window size in base pairs"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
161 <conditional name="restrict_chromosomes_cond">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
162 <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
163 <option value="no" selected="true">No</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
164 <option value="yes">Yes</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
165 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
166 <when value="no"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
167 <when value="yes">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
168 <repeat name="chrom_repeat" title="Chromosomes" min="1">
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
169 <param name="chrom" type="text" value="" label="Chromosome" help="One chromosome (e.g., chr1, chr2, chrX) per text field"/>
68
5fb237a06499 Uploaded
greg
parents: 67
diff changeset
170 </repeat>
5fb237a06499 Uploaded
greg
parents: 67
diff changeset
171 </when>
5fb237a06499 Uploaded
greg
parents: 67
diff changeset
172 </conditional>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
173 </when>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
174 <when value="yes">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
175 <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
176 </when>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
177 </conditional>
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
178 <param argument="-bychr" type="boolean" truevalue="-bychr" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
179 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using">
36
859687afe7bc Uploaded
greg
parents: 35
diff changeset
180 <option value="6" selected="true">mean</option>
859687afe7bc Uploaded
greg
parents: 35
diff changeset
181 <option value="8">max</option>
859687afe7bc Uploaded
greg
parents: 35
diff changeset
182 </param>
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
183 <param name="blacklist_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/>
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
184 <param argument="-norm" type="boolean" truevalue="-norm" falsevalue="" checked="False" label="Standardize all datasets"/>
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
185 <param argument="-hp" type="boolean" truevalue="-hp" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/>
47
3a4697f71a05 Uploaded
greg
parents: 46
diff changeset
186 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero value has no affect"/>
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
187 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero value has no affect"/>
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
188 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Zero value has no affect"/>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
189 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero value has no affect"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
190 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero value has no affect"/>
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
191 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default value: sqrt(number of cell types)"/>
34
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
192 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/>
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
193 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/>
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
194 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default value: 0.5"/>
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
195 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero value results in the default value: 1000000"/>
89
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
196 <param name="save_ideas_log" type="select" display="radio" label="Save IDEAS log in an additional history item">
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
197 <option value="no" selected="true">No</option>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
198 <option value="yes">Yes</option>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
199 </param>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
200 </inputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
201 <outputs>
89
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
202 <data name="output_log" format="txt" label="${tool.name} (ideas output log) on ${on_string}">
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
203 <filter>save_ideas_log == 'yes'</filter>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
204 </data>
26
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
205 <data name="output_cluster" format="txt" label="${tool.name} (local cell type clustering) on ${on_string}"/>
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
206 <data name="output_para" format="tabular" label="${tool.name} (epigenetic state frequency, mean and variance parameters) on ${on_string}"/>
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
207 <data name="output_profile" format="txt" label="${tool.name} (profile) on ${on_string}"/>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
208 <data name="output_state" format="txt" label="${tool.name} (epigenetic states and position classes) on ${on_string}"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
209 </outputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
210 <tests>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
211 <test>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
212 <param name="cell_type_epigenetic_factor" value="extract"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
213 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
214 <param name="input_name_positions" value="cell_first"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
215 <param name="specify_genomic_window" value="yes"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
216 <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
217 <output name="output_state" file="output_state.txt" ftype="txt"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
218 <output name="output_profile" file="output_profile.txt" ftype="txt"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
219 <output name="output_para" file="output_para.tabular" ftype="tabular"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
220 <output name="output_cluster" file="output_cluster.txt" ftype="txt"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
221 </test>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
222 <test>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
223 <param name="cell_type_epigenetic_factor" value="manual"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
224 <repeat name="input_repeat">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
225 <param name="cell_type_name" value="e001" />
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
226 <param name="epigenetic_factor_name" value="h3k4me3"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
227 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
228 </repeat>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
229 <param name="specify_genomic_window" value="yes"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
230 <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
231 <output name="output_state" file="output_state.txt" ftype="txt"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
232 <output name="output_profile" file="output_profile.txt" ftype="txt"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
233 <output name="output_para" file="output_para.tabular" ftype="tabular"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
234 <output name="output_cluster" file="output_cluster.txt" ftype="txt"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
235 </test>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
236 </tests>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
237 <help>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
238 **What it does**
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
239
95
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
240 IDEAS (an **I**\ ntegrative and **D**\ iscriminative **E**\ pigenome **A**\ nnotation **S**\ ystem) identifies
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
241 de novo regulatory functions from epigenetic data in multiple cell types jointly. It is a full probabilistic
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
242 model defined on all data, and it combines signals across both the genome and cell types to boost power. The
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
243 underlying assumption of IDEAS is that, because all cell types share the same underlying DNA sequences,
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
244 **functions of each DNA segment should be correlated**. Also, cell type specific regulation is locus-dependent,
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
245 and thus IDEAS uses local epigenetic landscape to **identify de novo and local cell type clusters** without
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
246 assuming or requiring a known global cell type relationship.
94
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
247
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
248 IDEAS takes as input a list of epigenetic data sets (histones, chromatin accessibility, CpG methylation, TFs, etc)
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
249 or any other whole-genome data sets (e.g., scores). Currently the supported data formats include BigWig and BAM.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
250 All data sets will first be mapped by IDEAS to a common genomic coordinate in a selected assembly (200bp windows
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
251 by default, or user-provided). The user can specify regions to be considered or removed from the analysis. The
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
252 input data may come from one cell type/condition/individual/time point (although it does not fully utilize the
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
253 advantage of IDEAS), or from multiple cell types/conditions/individuals/time points. The same set of epigenetic
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
254 features may not be present in all cell types, for which IDEAS will do imputation of the missing tracks if
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
255 specified.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
256
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
257 .. image:: $PATH_TO_IMAGES/ideas.png
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
258
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
259 IDEAS predicts regulatory functions, denoted by epigenetic states, at each position in each cell type by
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
260 **combining information simultaneously learned from other cell types** at the same positions in cell types with
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
261 similar local epigenetic landscapes. Size of genomic intervals for determining the similarity are also learned.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
262 All of the inferences are done through parallel infinite-state hidden Markov models (iHMM), which is a Bayesian
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
263 non-parametric technique to automatically determine the number of local cell type clusters and the number of
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
264 epigenetic states.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
265
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
266 In addition to its improved power, IDEAS has two unique advantages:
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
267
96
90144b60033c Uploaded
greg
parents: 95
diff changeset
268 1) applies **linear time inference** with respect to the number of cell types, which allows it to study hundreds or more cell types jointly
90144b60033c Uploaded
greg
parents: 95
diff changeset
269 2) uses mini-batch training to **improve reproducibility** of the predicted epigenetic states, which is important because genome segmentation is not convex and hence cannot guarantee a global optimal solution.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
270
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
271 -----
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
272
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
273 **Options**
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
274
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
275 * **Set cell type and epigenetic factor names by** - cell type and epigenetic factor names can be set manually or by extracting them from the names of the selected input datasets. The latter case requires all selected datasets to have names that contain a "-" character.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
276
93
0c2cf49dfb58 Uploaded
greg
parents: 91
diff changeset
277 * **BAM or BigWig files** - select one or more Bam or Bigwig files from your history, making sure that the name of every selected input include a "-" character (e.g., e001-h3k4me3.bigwig).
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
278 * **Cell type, Epigenetic factor and Input** - manually select any number of inputs, setting the cell type and epigenetic factor name for each. The combination of "cell type name" and "epigenetic factor name" must be unique for each input. For example, if you have replicate data you may want to specify the cell name as "rep1", "rep2", etc and the factor name as "rep1", "rep2", etc.
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
279
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
280 * **Cell type name** - cell type name
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
281 * **Epigenetic factor name** - epigenetic factor name
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
282 * **BAM or BigWig file** - BAM or BigWig file
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
283
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
284 * **Seed for IDEAS model initialization** - enter an integer to be used as the seed for the IDEAS model initialization. A zero value causes IDEAS to automatically generate a random seed, and this seed will be different for each job run.
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
285 * **Select Bed file that defines genomic windows on which to process the data** - if "No" is selected, IDEAS will run whole genome segmentation. If "Yes" is selected, IDEAS will segment genomes in the unit of the windows defined by the bed file. This file can be in BED3, BED4 or BED5 format, but only the first three columns (chr posst posed) will be used.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
286
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
287 * **Window size in base pairs** - Window size in base pairs (if "No" is selected)
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
288 * **Restrict processing to specified chromosomes** - If "Yes" is selected, processing will be restricted to specified chromosomes
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
289
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
290 * **Chromosomes** - processing will be restricted to specified chromosomes (if "Yes" is selected)
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
291
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
292 * **Bed file specifying the genomic windows** - bed file specifying the genomic windows (if "Yes" is selected)
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
293
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
294 * **Output chromosomes in separate files** - select "Yes" to produce separate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
295 * **Calculate the signal in each genomic window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate the signal (i.e., the number of reads per bp) in each genomic window.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
296 * **Select file(s) containing regions to exclude** - select one or more bed files that contains regions you'd like excluded from your datasets.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
297 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
298
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
299 * **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
300 * **Use log2(x+number) transformation** - perform Log2-transformation of the input data by log2(x+number) (recommended for read count data to reduce skewness). You can enter a number that is representative of the noise level in your data (e.g., a number less than 1). If this number is at a similar scale or larger than the signal in your data, it will lose power. For example, if your input data is mean read count per window, using 0.1 may produce better results.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
301 * **Maximum number of states to be inferred** - restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
302 * **Initial number of states** - while IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
303 * **Maximum number of position classes to be inferred** - Set this value only if:
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
304
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
305 * you do not want position classes (e.g., for testing purposes), in this case set the value to 1
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
306 * IDEAS runs slow because there are too many position classes, generally less than 100 position classes will run fine
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
307
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
308 * **Maximum number of cell type clusters allowed** - If you set the value to 1, then all cell types will be clustered in one group, which may be desirable if all cell types are homogeneous and you want IDEAS to use information in all cell types equally.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
309 * **Prior concentration** - specify the prior concentration parameter; default is A=sqrt(number of cell types). A smaller concentration parameter (e.g., 1 or less) will emphasize more on position specificity and a larger concentration parameter (e.g., 10 * number of cell types) will emphasize more on global homogeneity.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
310 * **Number of burnin steps** - specify the number of burnin steps; default is 20. Increasing the burnin and maximization steps will increase computing and only slightly increase accuracy, while decreasing them will reduce computing resources but may also reduce accuracy. We recommend to run IDEAS with at least 20 burnins and 20 maximizations. IDEAS will not stop even if it reaches a maximum mode.
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
311 * **Number of maximization steps** - specify the number of maximization steps; default is 20.
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
312 * **Minimum standard deviation for the emission Gaussian distribution** - This number multiplied by the overall standard deviation of your data will be used as a lower bound for the standard deviation for each factor in each epigenetic state (the default is 0.5). This number is useful for removing very subtle clusters in the data. Setting this value near 0 will allow IDEAS to discover many subtle states, while setting it greater than 1 will result in IDEAS losing the ability to detect meaningful states.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
313 * **Maximim standard deviation for the emission Gaussian distribution** - if you want to find fine-grained states you may use this option (if not used, IDEAS uses infinity), but it is rearely used unless you need more states to be inferred.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
314
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
315 </help>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
316 <citations>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
317 <citation type="doi">10.1093/nar/gkw278</citation>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
318 </citations>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
319 </tool>