Mercurial > repos > greg > ideas
changeset 169:7b0c6c6cb82b draft
Uploaded
author | greg |
---|---|
date | Thu, 25 Jan 2018 09:30:44 -0500 |
parents | 5c5e2f7b34c8 |
children | 59ed3d424524 |
files | ideas.xml |
diffstat | 1 files changed, 37 insertions(+), 216 deletions(-) [+] |
line wrap: on
line diff
--- a/ideas.xml Fri Jan 19 13:43:30 2018 -0500 +++ b/ideas.xml Thu Jan 25 09:30:44 2018 -0500 @@ -13,114 +13,60 @@ #set output_pdf_dir = "output_pdf_dir" #set output_txt_dir = "output_txt_dir" #set output_training_dir = "output_training_dir" -#set tmp_dir = "tmp" -#set prep_input_config = "prep_input_config.txt" -#set prep_output_config = "prep_output_config.txt" -#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window #set windows_positions_by_chroms_config = "windows_positions_by_chroms_config.txt" #set perform_training = $perform_training_cond.perform_training -############################################## -## Create the config file and prepare the data -############################################## -#if str($output_heatmaps) == "yes": - mkdir '$output_pdf_dir' && -#end if -#if str($perform_training) == "yes": - #set output_dir = $output_training_dir - mkdir '$output_training_dir' && -#else: - #set output_dir = $output_txt_dir - mkdir '$output_txt_dir' && -#end if -cp '$gen_prep_input_config' $prep_input_config && -sort $prep_input_config -o $prep_input_config && -prepMat -$prep_input_config -#if str($specify_genomic_window) == "yes": - -bed '$specify_genomic_window_cond.bed_input' -#else: - -gsz '$chromInfo' - -wsz $specify_genomic_window_cond.window_size - #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes - #if str($restrict_chromosomes) == "yes": - #set chroms = [] - #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat - #for $i in $chrom_repeat.chrom - $chroms.append($i) - #end for - -chr ",".join(chroms) - #end if +&& Rscript '$__tool_directory__/ideas.R' +--burnin_num $burnin_num +#if str($bychr) == "true": + --bychr true #end if -$bychr --c $reads_per_bp -#if str($blacklist_input) not in ["None", ""]: - -exclude '$blacklist_input' +--chrom_bed_input $input.metadata.chrom_bed_input +--chromosome_windows $input.metadata.chromosome_windows +#if str($hp) == "true": + --hp true #end if -$norm -&>prepmat_log.txt; -if [[ $? -ne 0 ]]; then - cp prepmat_log.txt '$output_dir'; - exit 1; -fi -############################################## -## Coerce the prepMat config output to the -## format expected by IDEAS. -############################################## -&& cut -d' ' $prep_input_config -f1,2 > file1.txt -&& ls tmp/*.bed.gz > file2.txt -&& paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config -#if str($specify_genomic_window) == "yes": - ############################################## - ## Using a genomic window bed file, so categorize - ## the window positions by chromosome to enable - ## the IDEAS -inv option. - ############################################## - && cp '$gen_windows_positions_by_chroms_config' $windows_positions_by_chroms_config +#if str($initial_states) != "0": + --initial_states $initial_states #end if -&& Rscript '$__tool_directory__/ideas.R' -#if str($specify_genomic_window) == "yes": - --windows_bed '$specify_genomic_window_cond.bed_input' - --windows_config $windows_positions_by_chroms_config -#end if -#if str($perform_training) == "yes": - --training_iterations $perform_training_cond.training_iterations - --training_windows $perform_training_cond.training_windows -#end if ---prep_output_config '$prep_output_config' ---hp $hp +--input $input +--input_files_path $input.extra_files_path +--ideas_input_config $input.metadata.ideas_input_config #if str($log2) != "0.0": --log2 $log2 #end if +#if str($maxerr) != "0.0": + --maxerr $maxerr +#end if +#if str($max_cell_type_clusters) != "0": + --max_cell_type_clusters $max_cell_type_clusters +#end if +#if str($max_position_classes) != "0": + --max_position_classes $max_position_classes +#end if #if str($max_states) != "0.0": --max_states $max_states #end if -#if str($initial_states) != "0": - --initial_states $initial_states -#end if -#if str($max_position_classes) != "0": - --max_position_classes $max_position_classes -#end if -#if str($max_cell_type_clusters) != "0": - --max_cell_type_clusters $max_cell_type_clusters +--mcmc_num $mcmc_num +#if str($minerr) != "0.0": + --minerr $minerr #end if #if str($prior_concentration) != "0.0": --prior_concentration $prior_concentration #end if ---burnin_num $burnin_num ---mcmc_num $mcmc_num -#if str($minerr) != "0.0": - --minerr $minerr -#end if -#if str($maxerr) != "0.0": - --maxerr $maxerr -#end if ---rseed $rseed ---thread \${GALAXY_SLOTS:-4} --project_name '$project_name' #if str($save_ideas_log) == "yes": --save_ideas_log $save_ideas_log --output_log '$output_log' #end if +#if str($standardize_datasets) == "true": + --standardize_datasets true +#end if +--rseed $rseed +--thread \${GALAXY_SLOTS:-4} +#if str($perform_training) == "yes": + --training_iterations $perform_training_cond.training_iterations + --training_windows $perform_training_cond.training_windows +#end if #if str($perform_training) == "yes": && mv ./*.para0 '$output_dir' && mv ./*.profile0 '$output_dir' @@ -140,52 +86,6 @@ #end if #end if ]]></command> - <configfiles> - <configfile name="gen_prep_input_config"><![CDATA[#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract": - #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions - #for $i in $cell_type_epigenetic_factor_cond.input: - #set file_name_with_ext = $i.name - #if str($file_name_with_ext).find("http") >= 0 or str($file_name_with_ext).find("ftp") >= 0: - #set file_name_with_ext = $file_name_with_ext.split('/')[-1] - #end if - #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext - #set file_name = $file_name_with_ext.split(".")[0] - #if str($input_name_positions) == "cell_first": - #set cell_type_name = $file_name.split("-")[0] - #set epigenetic_factor_name = $file_name.split("-")[1] - #else: - #set cell_type_name = $file_name.split("-")[1] - #set epigenetic_factor_name = $file_name.split("-")[0] - #end if -${cell_type_name} ${epigenetic_factor_name} ${i} - #end for -#else: - #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat: -${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input} - #end for -#end if]]></configfile> - <configfile name="gen_windows_positions_by_chroms_config"><![CDATA[#if str($specify_genomic_window_cond.specify_genomic_window) == "yes": - #import collections - #set window_positions_by_chroms_odict = $collections.OrderedDict() - #for count, line in enumerate(open($specify_genomic_window_cond.bed_input.file_name, 'r')): - #set $line = $line.strip() - #if not $line or $line.startswith('#'): - #continue - #end if - #set items = $line.split('\t') - #if $items[0] in $window_positions_by_chroms_odict: - #set tup = $window_positions_by_chroms_odict[$items[0]] - #set $tup[1] += 1 - #set $window_positions_by_chroms_odict[$items[0]] = $tup - #else: - #set $window_positions_by_chroms_odict[$items[0]] = [$count, $count+1] - #end if - #end for - #for $chrom, $tup in $window_positions_by_chroms_odict.items(): -${chrom} ${tup[0]} ${tup[1]} - #end for -#end if]]></configfile> - </configfiles> <inputs> <conditional name="perform_training_cond"> <param name="perform_training" type="select" label="Perform training?"> @@ -198,71 +98,16 @@ </when> <when value="no"/> </conditional> - <conditional name="cell_type_epigenetic_factor_cond"> - <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by"> - <option value="extract" selected="true">extracting them from the selected input file names</option> - <option value="manual">manually setting them for each selected input</option> - </param> - <when value="extract"> - <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files"> - <validator type="empty_field"/> - <validator type="unspecified_build"/> - </param> - <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names"> - <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option> - <option value="cell_last">Epigenetic factor name - Cell type name</option> - </param> - </when> - <when value="manual"> - <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1"> - <param name="cell_type_name" type="text" value="" label="Cell type name"> - <validator type="empty_field"/> - </param> - <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name"> - <validator type="empty_field"/> - </param> - <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file"> - <validator type="empty_field"/> - <validator type="unspecified_build"/> - </param> - </repeat> - </when> - </conditional> + <param name="input" type="data" format="ideaspre" label="Select IDEAS input"> <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name"> <validator type="empty_field"/> </param> <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/> - <conditional name="specify_genomic_window_cond"> - <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data"> - <option value="no" selected="true">No</option> - <option value="yes">Yes</option> - </param> - <when value="no"> - <param name="window_size" type="integer" value="200" label="Window size in base pairs"/> - <conditional name="restrict_chromosomes_cond"> - <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes"> - <option value="no" selected="true">No</option> - <option value="yes">Yes</option> - </param> - <when value="no"/> - <when value="yes"> - <repeat name="chrom_repeat" title="Chromosomes" min="1"> - <param name="chrom" type="text" value="" label="Chromosome" help="One chromosome (e.g., chr1, chr2, chrX) per text field"/> - </repeat> - </when> - </conditional> - </when> - <when value="yes"> - <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/> - </when> - </conditional> <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/> <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using"> <option value="6" selected="true">mean</option> <option value="8">max</option> </param> - <param name="blacklist_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/> - <param name="norm" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/> <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/> <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/> <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/> @@ -270,6 +115,7 @@ <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/> <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/> <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/> + <param name="standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/> <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/> <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/> <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/> @@ -303,11 +149,7 @@ <tests> <test> <param name="perform_training" value="no"/> - <param name="cell_type_epigenetic_factor" value="extract"/> - <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/> - <param name="input_name_positions" value="cell_first"/> - <param name="specify_genomic_window" value="yes"/> - <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/> + <param name="input" value="input.ideaspre" ftype="ideaspre" dbkey="hg19"/> <param name="project_name" value="IDEAS_out"/> <param name="initial_states" value="2"/> <param name="maxerr" value="1000"/> @@ -320,27 +162,6 @@ </output_collection> <output name="output_log" file="output_log.txt" ftype="txt" compare="contains" /> </test> - <test> - <param name="perform_training" value="no"/> - <param name="cell_type_epigenetic_factor" value="manual"/> - <repeat name="input_repeat"> - <param name="cell_type_name" value="e001" /> - <param name="epigenetic_factor_name" value="h3k4me3"/> - <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/> - </repeat> - <param name="specify_genomic_window" value="yes"/> - <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/> - <param name="project_name" value="IDEAS_out"/> - <param name="initial_states" value="2"/> - <param name="maxerr" value="1000"/> - <param name="output_heatmaps" value="no"/> - <output_collection name="output_txt_collection" type="list"> - <element name="IDEAS_out.chr1.cluster" file="IDEAS_out.cluster" ftype="txt"/> - <element name="IDEAS_out.chr1.para" file="IDEAS_out.para" ftype="txt"/> - <element name="IDEAS_out.chr1.profile" file="IDEAS_out.profile" ftype="txt"/> - <element name="IDEAS_out.chr1.state" file="IDEAS_out.state" ftype="txt"/> - </output_collection> - </test> </tests> <help> **What it does**