comparison ideas_preprocessor.xml @ 16:aaf64c0d7a0e draft

Uploaded
author greg
date Tue, 30 Jan 2018 09:34:17 -0500
parents 4d542da396a7
children 6ff92012abb7
comparison
equal deleted inserted replaced
15:ce2021cd68d2 16:aaf64c0d7a0e
1 <tool id="ideas_preprocessor" name="IDEAS preprocessor" version="1.0.0"> 1 <tool id="ideas_preprocessor" name="IDEAS Preprocessor" version="1.0.0">
2 <description></description> 2 <description></description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.5.4">deeptools</requirement> 4 <requirement type="package" version="2.5.4">deeptools</requirement>
5 <requirement type="package" version="1.10.4">r-data.table</requirement> 5 <requirement type="package" version="1.10.4">r-data.table</requirement>
6 <requirement type="package" version="1.4.4">r-optparse</requirement> 6 <requirement type="package" version="1.4.4">r-optparse</requirement>
125 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option> 125 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
126 <option value="cell_last">Epigenetic factor name - Cell type name</option> 126 <option value="cell_last">Epigenetic factor name - Cell type name</option>
127 </param> 127 </param>
128 </when> 128 </when>
129 <when value="manual"> 129 <when value="manual">
130 <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1"> 130 <repeat name="input_repeat" title="Cell type, epigenetic factor and input" min="1">
131 <param name="cell_type_name" type="text" value="" label="Cell type name"> 131 <param name="cell_type_name" type="text" value="" label="Cell type name">
132 <validator type="empty_field"/> 132 <validator type="empty_field"/>
133 </param> 133 </param>
134 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name"> 134 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
135 <validator type="empty_field"/> 135 <validator type="empty_field"/>
178 <outputs> 178 <outputs>
179 <data name="output" format="ideaspre"/> 179 <data name="output" format="ideaspre"/>
180 </outputs> 180 </outputs>
181 <tests> 181 <tests>
182 <test> 182 <test>
183 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
184 <param name="specify_chrom_windows" value="yes"/>
185 <param name="chrom_bed_input" value="chrom_windows.bed" ftype="bed" dbkey="hg19"/>
186 <output name="output" file="output.ideaspre" ftype="ideaspre" />
183 </test> 187 </test>
184 </tests> 188 </tests>
185 <help> 189 <help>
186 **What it does** 190 **What it does**
187 191
192 Takes as input a list of epigenetic data sets (histones, chromatin accessibility, CpG methylation, TFs, etc.)
193 or any other whole-genome data sets (e.g., scores). Currently the supported data formats are BigWig and BAM.
194 All data sets are mapped by to a common genomic coordinate in a selected assembly (user-provided window size
195 or 200bp windows by default). The user can specify regions to be considered or removed from the analysis.
196 The input data may come from one cell type/condition/individual/time point (although this approach does not
197 fully utilize the advantages of IDEAS), or from multiple cell types/conditions/individuals/time points. The
198 same set of epigenetic features may not be present in all cell types, in which case IDEAS perfroms imputation
199 of the missing tracks if specified. This tool produces a single dataset with the **IdeasPre** datatype for
200 use as input to the IDEAS tool.
201
188 ----- 202 -----
189 203
190 **Required options** 204 **Required options**
205
206 * **Set cell type and epigenetic factor names by** - cell type and epigenetic factor names can be set manually or by extracting them from the names of the selected input datasets. The latter case requires all selected datasets to have names that contain a "-" character.
207
208 * **BAM or BigWig files** - select one or more Bam or Bigwig files from your history, making sure that the name of every selected input include a "-" character (e.g., e001-h3k4me3.bigwig).
209 * **Cell type, Epigenetic factor and Input** - manually select any number of inputs, setting the cell type and epigenetic factor name for each. The combination of "cell type name" and "epigenetic factor name" must be unique for each input. For example, if you have replicate data you may want to specify the cell name as "rep1", "rep2", etc and the factor name as "rep1", "rep2", etc.
210
211 * **Cell type name** - cell type name if specifying manually.
212 * **Epigenetic factor name** - epigenetic factor name if specifying manually.
213 * **BAM or BigWig file** - BAM or BigWig file.
214 * **Selected input file name pattern is** - select the file name pattern, either **epigenetic factor name-cell type name** or **cell type name-epigenetic factor name**.
215
216 * **Define chromosome window positions from a bed file** - select "No" to run whole genome segmentation or select "Yes" to segment genomes within the unit of the windows defined by the bed file. This file can be in BED3, BED4 or BED5 format, but only the first three columns (chr posst posed) will be used.
217
218 * **Window size in base pairs** - Window size in base pairs if specifying manually.
219 * **Restrict processing to specified chromosomes** - select "Yes" to restrict processing to specified chromosomes.
220
221 * **Chromosomes** - enter a comma-separated list of chromosomes for processing.
222
223 * **Select bed file for defining chromosome window positions** - select a bed file for specifying the chromosome window positions.
224
225 * **Output chromosomes in separate files** - select "Yes" to produce separate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
226 * **Calculate the signal in each window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate the signal (i.e., the number of reads per bp) in each window.
227 * **Select bed file(s) containing regions to exclude** - select one or more bed files that contains regions you'd like excluded from your datasets.
228 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
191 229
192 </help> 230 </help>
193 <citations> 231 <citations>
194 <citation type="doi">10.1093/nar/gkw278</citation> 232 <citation type="doi">10.1093/nar/gkw278</citation>
195 </citations> 233 </citations>