annotate ideas.xml @ 61:d89f1a065d5c draft

Uploaded
author greg
date Wed, 23 Aug 2017 15:11:44 -0400
parents ec0e85a08def
children cf0fbd58feb4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d75cbb2db2c4 Uploaded
greg
parents: 0
diff changeset
1 <tool id="ideas" name="IDEAS" version="1.2.0">
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
2 <description>accounts for position dependent epigenetic events and detects local cell type relationships</description>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
3 <requirements>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="2.26.0">bedtools</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="332">ucsc-bedgraphtobigwig</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="332">ucsc-bedsort</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="332">ucsc-bigwigaverageoverbed</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
8 <requirement type="package" version="1.2.0">ideas</requirement>
20
a70690dcf9ff Uploaded
greg
parents: 14
diff changeset
9 <requirement type="package" version="1.3.2">r-optparse</requirement>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
10 </requirements>
30
1d854705de39 Uploaded
greg
parents: 29
diff changeset
11 <command detect_errors="exit_code"><![CDATA[
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
12 #set tmp_dir = "tmp"
60
ec0e85a08def Uploaded
greg
parents: 59
diff changeset
13 #set prep_input_config = "prep_input_config.txt"
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
14 #set prep_output_config = "prep_output_config.txt"
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
15 ##############################################
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
16 ## Create the config file and prepare the data
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
17 ##############################################
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
18 #set input_type = $input_type_cond.input_type
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
19 #if str($input_type) == "datasets":
48
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
20 #set cell_type_epigenetic_factor_cond = $input_type_cond.cell_type_epigenetic_factor_cond
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
21 #set cell_type_epigenetic_factor = $cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
22 #if str($cell_type_epigenetic_factor) == "extract":
60
ec0e85a08def Uploaded
greg
parents: 59
diff changeset
23 cp '$extract_prep_input_config' $prep_input_config &&
48
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
24 #else:
60
ec0e85a08def Uploaded
greg
parents: 59
diff changeset
25 cp '$manual_prep_input_config' $prep_input_config &&
48
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
26 #end if
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
27 #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
28 #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
49
af8bf2b76697 Uploaded
greg
parents: 48
diff changeset
29 prepMat
60
ec0e85a08def Uploaded
greg
parents: 59
diff changeset
30 $prep_input_config
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
31 #if str($specify_genomic_window) == "yes":
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
32 -bed '$specify_genomic_window_cond.bed_input'
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
33 #else:
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
34 -gsz '$chromInfo'
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
35 -wsz $specify_genomic_window_cond.window_size
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
36 #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
37 #if str($restrict_chromosomes) == "yes":
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
38 #set chroms = []
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
39 #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
40 #for $i in $chrom_repeat.chrom
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
41 $chroms.append($i)
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
42 #end for
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
43 -chr ",".join(chroms)
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
44 #end if
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
45 #end if
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
46 #end if
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
47 $bychr
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
48 -c $reads_per_bp
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
49 #if str($blacklist_input) not in ["None", ""]:
34
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
50 -exclude '$blacklist_input'
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
51 #end if
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
52 $norm
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
53 ##############################################
29
06522a4aae6d Uploaded
greg
parents: 28
diff changeset
54 ## Coerce the prepMat config output to the
26
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
55 ## format expected by the R matrix builder.
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
56 ##############################################
60
ec0e85a08def Uploaded
greg
parents: 59
diff changeset
57 && cut -d' ' $prep_input_config -f1,2 > file1.txt
29
06522a4aae6d Uploaded
greg
parents: 28
diff changeset
58 && ls tmp/*.bed.gz > file2.txt
26
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
59 && paste <(cat file1.txt) <(cat file2.txt) > $prep_output_config
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
60 ##############################################
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
61 ## Build the R matrix from the prepMat output
9874800487e6 Uploaded
greg
parents: 1
diff changeset
62 ##############################################
25
0169856fe7bb Uploaded
greg
parents: 24
diff changeset
63 ##&& Rscript '$__tool_directory__/build_matrix.R'
0169856fe7bb Uploaded
greg
parents: 24
diff changeset
64 ##-i $tmp_dir/*.bed.gz
0169856fe7bb Uploaded
greg
parents: 24
diff changeset
65 ##-o $ideas_matrix_input_file
0169856fe7bb Uploaded
greg
parents: 24
diff changeset
66 ##-w $ideas_input_dir
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
67 ##############################################
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
68 ## Run IDEAS
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
69 ##############################################
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
70 && ideas
25
0169856fe7bb Uploaded
greg
parents: 24
diff changeset
71 '$prep_output_config'
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
72 #if str($input_type) == "datasets":
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
73 #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
74 #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
75 #if str($specify_genomic_window) == "yes":
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
76 '$specify_genomic_window_cond.bed_input'
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
77 #else:
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
78 $tmp_dir/*.bed
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
79 #end if
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
80 #else:
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
81 $tmp_dir/*.bed
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
82 #end if
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
83 $hp
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
84 #if str($log2) != "0.0":
47
3a4697f71a05 Uploaded
greg
parents: 46
diff changeset
85 -log2 $log2
3a4697f71a05 Uploaded
greg
parents: 46
diff changeset
86 #end if
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
87 #if str($max_states) != "0.0":
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
88 -G $max_states
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
89 #end if
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
90 #if str($initial_states) != "0":
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
91 -C $initial_states
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
92 #end if
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
93 #if str($max_position_classes) != "0":
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
94 -P $max_position_classes
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
95 #end if
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
96 #if str($max_cell_type_clusters) != "0":
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
97 -K $max_cell_type_clusters
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
98 #end if
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
99 #if str($prior_concentration) != "0.0":
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
100 -A $prior_concentration
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
101 #end if
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
102 -sample $burnin_num $mcmc_num
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
103 #if str($minerr) != "0.0":
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
104 -minerr $minerr
42
46445e3dc9e2 Uploaded
greg
parents: 41
diff changeset
105 #end if
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
106 #if str($maxerr) != "0.0":
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
107 -maxerr $maxerr
42
46445e3dc9e2 Uploaded
greg
parents: 41
diff changeset
108 #end if
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
109 -thread \${GALAXY_SLOTS:-4}
28
91dc2a532890 Uploaded
greg
parents: 27
diff changeset
110 > $output_log
38
3421fff414de Uploaded
greg
parents: 37
diff changeset
111 && mv ./*.cluster $output_cluster
3421fff414de Uploaded
greg
parents: 37
diff changeset
112 && mv ./*.para $output_para
3421fff414de Uploaded
greg
parents: 37
diff changeset
113 && mv ./*.profile $output_profile
3421fff414de Uploaded
greg
parents: 37
diff changeset
114 && mv ./*.state $output_state
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
115 ]]></command>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
116 <configfiles>
48
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
117 <configfile name="manual_prep_input_config"><![CDATA[
50
14ac679f3c4c Uploaded
greg
parents: 49
diff changeset
118 #if $input_type_cond.cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor == "manual":
59
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
119 #for $input_items in $input_type_cond.cell_type_epigenetic_factor_cond.input_repeat:
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
120 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input}
59
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
121 #end for
50
14ac679f3c4c Uploaded
greg
parents: 49
diff changeset
122 #end if]]></configfile>
48
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
123 <configfile name="extract_prep_input_config"><![CDATA[
58
c2642e6cb30e Uploaded
greg
parents: 57
diff changeset
124 #import os
50
14ac679f3c4c Uploaded
greg
parents: 49
diff changeset
125 #if $input_type_cond.cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor == "extract":
59
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
126 #set $cell_type_epigenetic_factor_cond = $input_type_cond.cell_type_epigenetic_factor_cond
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
127 #set $input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
128 #for $i in $cell_type_epigenetic_factor_cond.input:
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
129 #set $file_name_with_ext = $os.path.basename($i)
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
130 #set $file_name = $file_name_with_ext.split(".")[0]
59
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
131 #if $input_name_positions == "cell_first":
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
132 #set $cell_type_name = $file_name.split("-")[0]
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
133 #set $epigenetic_factor_name = $file_name.split("-")[1]
59
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
134 #else:
61
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
135 #set $cell_type_name = $file_name.split("-")[1]
d89f1a065d5c Uploaded
greg
parents: 60
diff changeset
136 #set $epigenetic_factor_name = $file_name.split("-")[0]
59
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
137 #end if
48
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
138 ${cell_type_name} ${epigenetic_factor_name} ${i}
59
9fbd77da3043 Uploaded
greg
parents: 58
diff changeset
139 #end for
50
14ac679f3c4c Uploaded
greg
parents: 49
diff changeset
140 #end if]]></configfile>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
141 </configfiles>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
142 <inputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
143 <conditional name="input_type_cond">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
144 <param name="input_type" type="select" label="Select input type">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
145 <option value="datasets" selected="true">Bam, BigWig files</option>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
146 <option value="data_matrix">Data matrix</option>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
147 </param>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
148 <when value="datasets">
48
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
149 <conditional name="cell_type_epigenetic_factor_cond">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
150 <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
151 <option value="extract" selected="true">extracting them from the selected input file names</option>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
152 <option value="manual">manually setting them for each selected input</option>
32
58f5b2af9473 Uploaded
greg
parents: 31
diff changeset
153 </param>
48
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
154 <when value="extract">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
155 <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig file">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
156 <validator type="empty_field"/>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
157 <validator type="unspecified_build"/>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
158 </param>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
159 <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
160 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
161 <option value="cell_last">Epigenetic factor name - Cell type name</option>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
162 </param>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
163 </when>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
164 <when value="manual">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
165 <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
166 <param name="cell_type_name" type="text" value="" label="Cell type name">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
167 <validator type="empty_field"/>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
168 </param>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
169 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
170 <validator type="empty_field"/>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
171 </param>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
172 <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file">
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
173 <validator type="empty_field"/>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
174 <validator type="unspecified_build"/>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
175 </param>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
176 </repeat>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
177 </when>
c9c90bfbeb20 Uploaded
greg
parents: 47
diff changeset
178 </conditional>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
179 <conditional name="specify_genomic_window_cond">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
180 <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
181 <option value="no" selected="true">No</option>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
182 <option value="yes">Yes</option>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
183 </param>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
184 <when value="no">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
185 <param name="window_size" type="integer" value="200" label="Window size in base pairs"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
186 <conditional name="restrict_chromosomes_cond">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
187 <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
188 <option value="no" selected="true">No</option>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
189 <option value="yes">Yes</option>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
190 </param>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
191 <when value="no"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
192 <when value="yes">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
193 <repeat name="chrom_repeat" title="Chromosomes" min="1">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
194 <param name="chrom" type="text" value="" label="Chromosome"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
195 </repeat>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
196 </when>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
197 </conditional>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
198 </when>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
199 <when value="yes">
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
200 <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
201 </when>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
202 </conditional>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
203 </when>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
204 <when value="data_matrix"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
205 </conditional>
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
206 <param argument="-bychr" type="boolean" truevalue="-bychr" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
36
859687afe7bc Uploaded
greg
parents: 35
diff changeset
207 <param name="reads_per_bp" type="select" display="radio" label="Calculate the average signal in each genomic window using">
859687afe7bc Uploaded
greg
parents: 35
diff changeset
208 <option value="6" selected="true">mean</option>
859687afe7bc Uploaded
greg
parents: 35
diff changeset
209 <option value="8">max</option>
859687afe7bc Uploaded
greg
parents: 35
diff changeset
210 </param>
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
211 <param name="blacklist_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/>
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
212 <param argument="-norm" type="boolean" truevalue="-norm" falsevalue="" checked="False" label="Standardize all datasets"/>
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
213 <param argument="-hp" type="boolean" truevalue="-hp" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/>
47
3a4697f71a05 Uploaded
greg
parents: 46
diff changeset
214 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero value has no affect"/>
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
215 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero value has no affect"/>
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
216 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Zero value has no affect"/>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
217 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero value has no affect"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
218 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero value has no affect"/>
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
219 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default value: sqrt(number of cell types)"/>
34
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
220 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/>
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
221 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/>
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
222 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default value: 0.5"/>
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
223 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero value results in the default value: 1000000"/>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
224 </inputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
225 <outputs>
28
91dc2a532890 Uploaded
greg
parents: 27
diff changeset
226 <data name="output_log" format="txt" label="${tool.name} (ideas output log) on ${on_string}"/>
26
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
227 <data name="output_cluster" format="txt" label="${tool.name} (local cell type clustering) on ${on_string}"/>
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
228 <data name="output_para" format="tabular" label="${tool.name} (epigenetic state frequency, mean and variance parameters) on ${on_string}"/>
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
229 <data name="output_profile" format="txt" label="${tool.name} (profile) on ${on_string}"/>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
230 <data name="output_state" format="txt" label="${tool.name} (epigenetic states and position classes) on ${on_string}"/>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
231 </outputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
232 <tests>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
233 </tests>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
234 <help>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
235 **What it does**
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
236
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
237 Employs the IDEAS (Integrative and Discriminative Epigenome Annotation System) method for jointly and quantitatively characterizing
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
238 multivariate epigenetic landscapes in many cell types, tissues or conditions. The method accounts for position dependent epigenetic
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
239 events and detects local cell type relationships, which not only help to improve the accuracy of annotating functional classes of DNA
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
240 sequences, but also reveal cell type constitutive and specific loci. The method utilizes Bayesian non-parametric techniques to automatically
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
241 identify the best model size fitting to the data so users do not have to specify the number of states. On the other hand, users can
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
242 still specify the number of states if desired.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
243
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
244 -----
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
245
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
246 **Required options**
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
247
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
248 * **Cell type, Epigenetic factor and Input** - specify any number of inputs with currently supported formats, either bam or bigwig. The cell name + factor name must be unique for each input. For example, if you have replicate data you may want to specify the cell name as "cell_rep1", "cell_rep2", etc and the factor name as "factor_rep1", "factor_rep2", etc.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
249
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
250 * **Cell type name** - cell type name
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
251 * **Epigenetic factor name** - epigenetic factor name
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
252 * **BAM or BigWig file** - BAM or BigWig file
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
253
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
254 * **Set genomic windows on which to process the data** - if "No" is selected, IDEAS will run whole genome segmentation. If "Yes" is selected, IDEAS will segment genomes in the unit of the windows defined by the bed file. This file can be in BED3, BED4 or BED5 format, but only the first three columns (chr posst posed) will be used.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
255
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
256 * **Window size in base pairs** - Window size in base pairs (if "No" is selected)
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
257 * **Restrict processing to specified chromosomes** - If "Yes" is selected, processing will be restricted to specified chromosomes
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
258
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
259 * **Chromosomes** - processing will be restricted to specified chromosomes (if "Yes" is selected)
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
260
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
261 * **Chromosome** - specified chromosome
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
262
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
263 * **Bed file specifying the genomic windows** - bed file specifying the genomic windows (if "Yes" is selected)
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
264
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
265 **Other options**
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
266
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
267 * **Output chromosomes in separate files** - select "Yes" to produce separate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
36
859687afe7bc Uploaded
greg
parents: 35
diff changeset
268 * **Calculate the average signal in each genomic window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate average signal (number of reads per bp) in each genomic window.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
269 * **Select file(s) containing regions to exclude** - select one or more bed files that contains regions you'd like excluded from your datasets.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
270 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
271
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
272 * **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
273 * **Use log2(x+number) transformation** - perform Log2-transformation of the input data by log2(x+number) (recommended for read count data to reduce skewness). You can enter a number less than 1. For example, if your input data is mean read count per window, using 0.1 may produce better results.
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
274 * **Maximum number of states to be inferred** - restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
275 * **Initial number of states** - while IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
276 * **Maximum number of position classes to be inferred** - Set this value only if:
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
277
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
278 * you do not want position classes (e.g., for testing purposes), in this case set the value to 1
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
279 * IDEAS runs slow because there are too many position classes, generally less than 100 position classes will run fine
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
280
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
281 * **Maximum number of cell type clusters allowed** - Set this value only for testing. If you set the value to 1, then all cell types will be clustered in one group.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
282 * **Prior concentration** - specify the prior concentration parameter; default is A=sqrt(number of cell types). A smaller concentration parameter (e.g., 1 or less) will emphasize more on position specificity and a larger concentration parameter (e.g., 10 * number of cell types) will emphasize more on global homogeneity.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
283 * **Set the the number of burnin and maximization steps** - specify the number of burnin and maximization steps; default it is 50 50. Increasing these two numbers will increase computing and only slightly increase accuracy. Decreasing these two numbers will reduce computing but may also reduce accuracy. We recommend to run IDEAS with at least 20 burnins and 20 maximizations. IDEAS will not stop even if it reaches a maximum mode.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
284 * **Minimum standard deviation for the emission Gaussian distribution** - you should change the default value of 0.5 if the standard deviation of your data is much smaller or much larger than 1. The first line of the output produced by IDEAS is **ysd=xxx**, which is the total standard deviation of your data. If that value is less than 0.5, you may set the minimum standard deviation to an even smaller number (e.g., xxx/2). If the standard deviation of your data is much greater than 1, (e.g., 20), you may set the minimum standard deviation to a larger value, (e.g., 5). Modifying the minimum standard deviation in the former case is more necessary than in the latter case because otherwise you may end up finding no interesting segmentations. We do not recommend setting the minimum standard deviation to be 0 or smaller, as doing so may capture some artificial and uninteresting states due to tightly clustered data, such as 0 in read counts.
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
285 * **Maximim standard deviation for the emission Gaussian distribution** - if you want to find fine-grained states you may use this option (if not used, IDEAS uses infinity), but it is rearely used unless you need more states to be inferred.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
286
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
287 </help>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
288 <citations>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
289 <citation type="doi">10.1093/nar/gkw278</citation>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
290 </citations>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
291 </tool>