ideas: ideas.xml comparison

comparison ideas.xml @ 180:15bd502e6a0c draft

Uploaded

author	greg
date	Thu, 01 Feb 2018 12:54:06 -0500
parents	12168e1e4859
children	28a995056cd0

comparison

equal deleted inserted replaced

-:f32f15562a82
+:15bd502e6a0c
 </requirements>
 <command detect_errors="exit_code"><![CDATA[
 #import os
 #set perform_training = $perform_training_cond.perform_training
-## Set the value of chrom_bed_input since we don't symlink it.
+## Copy the input's compressed tmp directory archive.
-#if $input.metadata.chrom_bed is None:
+cp $input.metadata.tmp_archive . &&
-#set chrom_bed_input = None
+## Extract the tmp archive.
-#else:
+tar -xzf tmp.tar.gz &&
-#set chrom_bed_input = $os.path.join($input.extra_files_path, $input.metadata.chrom_bed)
-#end if
-## Link chromosome_windows.txt if not None.
-#set chromosome_windows = $input.metadata.chrom_windows
-#if chromosome_windows is not None:
-#set from_path = $os.path.join($input.extra_files_path, $chromosome_windows)
-ln -s $from_path . &&
-#end if
-## Link IDEAS_input_config.txt which will always exist.
-#set ideas_input_config = $input.metadata.input_config
-#set from_path = $os.path.join($input.extra_files_path, $ideas_input_config)
-ln -s $from_path . &&
-## Link all files in the extra_files_path's tmp directory.
-#set tmp_dir = "tmp"
-#set from_path = $os.path.join($input.extra_files_path, "tmp")
-mkdir $tmp_dir &&
-#for f in $os.listdir($from_path):
-ln -s $os.path.join($from_path, $f) $tmp_dir &&
-#end for
 ## Define and create output directories.
 #set output_pdf_dir = "output_pdf_dir"
 #set output_txt_dir = "output_txt_dir"
 #set output_training_dir = "output_training_dir"
 Rscript '$__tool_directory__/ideas.R'
 --burnin_num $burnin_num
 #if str($bychr) == "true":
 --bychr true
 #end if
---chrom_bed_input $chrom_bed_input
+#if $input.metadata.chrom_bed is not None:
---chromosome_windows $chromosome_windows
+--chrom_bed_input $input.metadata.chrom_bed
+#end if
+#if $input.metadata.chrom_windows is not None:
+--chromosome_windows $input.metadata.chrom_windows
+#end if
 #if str($hp) == "true":
 --hp true
 #end if
 #if str($initial_states) != "0":
 --initial_states $initial_states
 #end if
---input_files_path $input.extra_files_path
+--ideas_input_config $input.metadata.input_config
---ideas_input_config $ideas_input_config
 #if str($log2) != "0.0":
 --log2 $log2
 #end if
 #if str($maxerr) != "0.0":
 --maxerr $maxerr
 <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name">
 <validator type="empty_field"/>
 </param>
 <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/>
 <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
-<param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using">
+<param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each window using">
 <option value="6" selected="true">mean</option>
 <option value="8">max</option>
 </param>
 <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/>
 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/>
 <filter>perform_training_cond['perform_training'] == 'yes'</filter>
 </collection>
 </outputs>
 <tests>
 <test>
-<param name="perform_training" value="no"/>
+<param name="perform_training" value="yes"/>
-<param name="input" value="input.ideaspre" ftype="ideaspre" dbkey="hg19"/>
+<param name="training_iterations" value="3"/>
+<param name="input" value="ideas_test1/input.html" dbkey="hg19" ftype="ideaspre">
+<composite_data value='ideas_test1/chromosomes.bed' dbkey="hg19"/>
+<composite_data value='ideas_test1/chromosome_windows.txt' dbkey="hg19"/>
+<composite_data value='ideas_test1/IDEAS_input_config.txt' dbkey="hg19"/>
+<composite_data value='ideas_test1/tmp.tar.gz' dbkey="hg19"/>
+<param name="output_heatmaps" value="yes"/>
+</param>
 <param name="project_name" value="IDEAS_out"/>
-<param name="initial_states" value="2"/>
-<param name="maxerr" value="1000"/>
-<param name="output_heatmaps" value="no"/>
 <param name="save_ideas_log" value="yes"/>
-<output_collection name="output_txt_collection" type="list">
+<output_collection name="output_training_collection" type="list">
 <element name="IDEAS_out.chr1.cluster" file="IDEAS_out.cluster" ftype="txt"/>
-<element name="IDEAS_out.chr1.para" file="IDEAS_out.para" ftype="txt"/>
+<element name="IDEAS_out.chr2.cluster" file="IDEAS_out.cluster" ftype="txt"/>
-<element name="IDEAS_out.chr1.profile" file="IDEAS_out.profile" ftype="txt"/>
+<element name="IDEAS_out.chr3.cluster" file="IDEAS_out.cluster" ftype="txt"/>
-<element name="IDEAS_out.chr1.state" file="IDEAS_out.state" ftype="txt"/>
+<element name="IDEAS_out.chr4.cluster" file="IDEAS_out.cluster" ftype="txt"/>
+<element name="IDEAS_out.chr5.cluster" file="IDEAS_out.cluster" ftype="txt"/>
+<element name="IDEAS_out.chr1.state" file="IDEAS_out.chr1.state" ftype="txt"/>
+<element name="IDEAS_out.chr2.state" file="IDEAS_out.chr2.state" ftype="txt"/>
+<element name="IDEAS_out.chr3.state" file="IDEAS_out.chr3.state" ftype="txt"/>
+<element name="IDEAS_out.chr4.state" file="IDEAS_out.chr4.state" ftype="txt"/>
+<element name="IDEAS_out.chr5.state" file="IDEAS_out.chr5.state" ftype="txt"/>
+<element name="IDEAS_out.para0" file="IDEAS_out.para0" ftype="txt"/>
+<element name="IDEAS_out.profile0" file="IDEAS_out.profile0" ftype="txt"/>
+</output_collection>
+<output_collection name="output_pdf_collection" type="list">
+<element name="IDEAS_out.state.1.pdf" file="IDEAS_out.state.1.pdf" ftype="pdf"/>
 </output_collection>
 <output name="output_log" file="output_log.txt" ftype="txt" compare="contains" />
 </test>
 </tests>
 <help>
 IDEAS (an **I**\ ntegrative and **D**\ iscriminative **E**\ pigenome **A**\ nnotation **S**\ ystem) identifies
 de novo regulatory functions from epigenetic data in multiple cell types jointly. It is a full probabilistic
 model defined on all data, and it combines signals across both the genome and cell types to boost power. The
 underlying assumption of IDEAS is that, because all cell types share the same underlying DNA sequences,
 **functions of each DNA segment should be correlated**. Also, cell type specific regulation is locus-dependent,
-and thus IDEAS uses local epigenetic landscape to **identify de novo and local cell type clusters** without
+and so IDEAS uses local epigenetic landscape to **identify de novo and local cell type clusters** without
 assuming or requiring a known global cell type relationship.
-IDEAS takes as input a list of epigenetic data sets (histones, chromatin accessibility, CpG methylation, TFs, etc)
+The input is a single dataset with the **IdeasPre** datatype, which is produced by the IDEAS Preprocessor tool.
-or any other whole-genome data sets (e.g., scores). Currently the supported data formats include BigWig and BAM.
-All data sets will first be mapped by IDEAS to a common genomic coordinate in a selected assembly (200bp windows
-by default, or user-provided). The user can specify regions to be considered or removed from the analysis. The
-input data may come from one cell type/condition/individual/time point (although it does not fully utilize the
-advantage of IDEAS), or from multiple cell types/conditions/individuals/time points. The same set of epigenetic
-features may not be present in all cell types, for which IDEAS will do imputation of the missing tracks if
-specified.
 .. image:: $PATH_TO_IMAGES/ideas.png
 IDEAS predicts regulatory functions, denoted by epigenetic states, at each position in each cell type by
 **combining information simultaneously learned from other cell types** at the same positions in cell types with
 * **Epigenetic factor name** - epigenetic factor name
 * **BAM or BigWig file** - BAM or BigWig file
 * **Project name** - datasets produced by IDEAS will have this base name.
 * **Seed for IDEAS model initialization** - enter an integer to be used as the seed for the IDEAS model initialization.  A zero value causes IDEAS to automatically generate a random seed, and this seed will be different for each job run.
-* **Select Bed file that defines genomic windows on which to process the data** - if "No" is selected, IDEAS will run whole genome segmentation.  If "Yes" is selected, IDEAS will segment genomes in the unit of the windows defined by the bed file.  This file can be in BED3, BED4 or BED5 format, but only the first three columns (chr posst posed) will be used.
-* **Window size in base pairs** - Window size in base pairs (if "No" is selected)
-* **Restrict processing to specified chromosomes** - If "Yes" is selected, processing will be restricted to specified chromosomes
-* **Chromosomes** - processing will be restricted to specified chromosomes (if "Yes" is selected)
-* **Bed file specifying the genomic windows** - bed file specifying the genomic windows (if "Yes" is selected)
 * **Output chromosomes in separate files** - select "Yes" to produce separate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
-* **Calculate the signal in each genomic window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate the signal (i.e., the number of reads per bp) in each genomic window.
+* **Calculate the signal in each window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate the signal (i.e., the number of reads per bp) in each window.
-* **Select file(s) containing regions to exclude** - select one or more bed files that contains regions you'd like excluded from your datasets.
 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
 * **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision.
 * **Use log2(x+number) transformation** - perform Log2-transformation of the input data by log2(x+number) (recommended for read count data to reduce skewness). You can enter a number that is representative of the noise level in your data (e.g., a number less than 1). If this number is at a similar scale or larger than the signal in your data, it will lose power.  For example, if your input data is mean read count per window, using 0.1 may produce better results.
 * **Maximum number of states to be inferred** - restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified
 * **Initial number of states** - while IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.

Mercurial > repos > greg > ideas

comparison ideas.xml @ 180:15bd502e6a0c draft