Mercurial > repos > greg > ideas
changeset 90:029e18c3c17b draft
Uploaded
author | greg |
---|---|
date | Tue, 29 Aug 2017 09:46:32 -0400 |
parents | d6ab97fb8aca |
children | 6f8af8e816d0 |
files | build_matrix.R ideas.xml test-data/e001-h3k4me3.bigwig test-data/genomic_windows.bed test-data/output_cluster.txt test-data/output_para.tabular test-data/output_profile.txt test-data/output_state.txt |
diffstat | 8 files changed, 216 insertions(+), 136 deletions(-) [+] |
line wrap: on
line diff
--- a/build_matrix.R Fri Aug 25 12:22:59 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -option_list <- list( - make_option(c("-i", "--input"), action="store", dest="input", help="Input .bed.gz file produced by prepMat"), - make_option(c("-o", "--output"), action="store", dest="output", help="Output file"), - make_option(c("-w", "--work_dir"), action="store", dest="work_dir", help="Working directory") -) - -parser <- OptionParser(usage="%prog [options] file", option_list=option_list) -args <- parse_args(parser, positional_arguments=TRUE) -opt <- args$options - -data_table <- read.table(opt$input) -as.matrix(data_table) -status <- match(data_table[,3], missing) -data_table[,3] <- paste(opt$work_dir, data_table[,1], ".", data_table[,2], ".bed.gz", sep="") -data_table <- data_table[is.na(status)==TRUE,] -write.table(array(data_table, dim=c(length(data_table)/3, 3)), file=opt$output, quote=FALSE, row.names=FALSE, col.names=FALSE)
--- a/ideas.xml Fri Aug 25 12:22:59 2017 -0400 +++ b/ideas.xml Tue Aug 29 09:46:32 2017 -0400 @@ -15,29 +15,23 @@ ############################################## ## Create the config file and prepare the data ############################################## -#set input_type = $input_type_cond.input_type -#if str($input_type) == "datasets": - #set cell_type_epigenetic_factor_cond = $input_type_cond.cell_type_epigenetic_factor_cond - #set cell_type_epigenetic_factor = $cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor - #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond - #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window - cp '$gen_prep_input_config' $prep_input_config && - prepMat - $prep_input_config - #if str($specify_genomic_window) == "yes": - -bed '$specify_genomic_window_cond.bed_input' - #else: - -gsz '$chromInfo' - -wsz $specify_genomic_window_cond.window_size - #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes - #if str($restrict_chromosomes) == "yes": - #set chroms = [] - #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat - #for $i in $chrom_repeat.chrom - $chroms.append($i) - #end for - -chr ",".join(chroms) - #end if +#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window +cp '$gen_prep_input_config' $prep_input_config && +prepMat +$prep_input_config +#if str($specify_genomic_window) == "yes": + -bed '$specify_genomic_window_cond.bed_input' +#else: + -gsz '$chromInfo' + -wsz $specify_genomic_window_cond.window_size + #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes + #if str($restrict_chromosomes) == "yes": + #set chroms = [] + #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat + #for $i in $chrom_repeat.chrom + $chroms.append($i) + #end for + -chr ",".join(chroms) #end if #end if $bychr @@ -48,31 +42,19 @@ $norm ############################################## ## Coerce the prepMat config output to the -## format expected by the R matrix builder. +## format expected by IDEAS. ############################################## && cut -d' ' $prep_input_config -f1,2 > file1.txt && ls tmp/*.bed.gz > file2.txt -&& paste <(cat file1.txt) <(cat file2.txt) > $prep_output_config -############################################## -## Build the R matrix from the prepMat output -############################################## -##&& Rscript '$__tool_directory__/build_matrix.R' -##-i $tmp_dir/*.bed.gz -##-o $ideas_matrix_input_file -##-w $ideas_input_dir +&& paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config ############################################## ## Run IDEAS ############################################## && ideas '$prep_output_config' -#if str($input_type) == "datasets": - #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond - #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window - #if str($specify_genomic_window) == "yes": - '$specify_genomic_window_cond.bed_input' - #else: - $tmp_dir/*.bed - #end if +#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window +#if str($specify_genomic_window) == "yes": + '$specify_genomic_window_cond.bed_input' #else: $tmp_dir/*.bed #end if @@ -115,93 +97,81 @@ ]]></command> <configfiles> <configfile name="gen_prep_input_config"><![CDATA[#import os -#if str($input_type_cond.input_type) == "datasets": - #if str($input_type_cond.cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract": - #set cell_type_epigenetic_factor_cond = $input_type_cond.cell_type_epigenetic_factor_cond - #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions - #for $i in $cell_type_epigenetic_factor_cond.input: - #set file_name_with_ext = $i.name - #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext - #set file_name = $file_name_with_ext.split(".")[0] - #if str($input_name_positions) == "cell_first": - #set cell_type_name = $file_name.split("-")[0] - #set epigenetic_factor_name = $file_name.split("-")[1] - #else: - #set cell_type_name = $file_name.split("-")[1] - #set epigenetic_factor_name = $file_name.split("-")[0] - #end if +#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract": + #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions + #for $i in $cell_type_epigenetic_factor_cond.input: + #set file_name_with_ext = $i.name + #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext + #set file_name = $file_name_with_ext.split(".")[0] + #if str($input_name_positions) == "cell_first": + #set cell_type_name = $file_name.split("-")[0] + #set epigenetic_factor_name = $file_name.split("-")[1] + #else: + #set cell_type_name = $file_name.split("-")[1] + #set epigenetic_factor_name = $file_name.split("-")[0] + #end if ${cell_type_name} ${epigenetic_factor_name} ${i} - #end for - #else: - #for $input_items in $input_type_cond.cell_type_epigenetic_factor_cond.input_repeat: + #end for +#else: + #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat: ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input} - #end for - #end if + #end for #end if]]></configfile> </configfiles> <inputs> - <conditional name="input_type_cond"> - <param name="input_type" type="select" label="Select input type"> - <option value="datasets" selected="true">Bam, BigWig files</option> - <option value="data_matrix">Data matrix</option> + <conditional name="cell_type_epigenetic_factor_cond"> + <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by"> + <option value="extract" selected="true">extracting them from the selected input file names</option> + <option value="manual">manually setting them for each selected input</option> </param> - <when value="datasets"> - <conditional name="cell_type_epigenetic_factor_cond"> - <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by"> - <option value="extract" selected="true">extracting them from the selected input file names</option> - <option value="manual">manually setting them for each selected input</option> + <when value="extract"> + <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files"> + <validator type="empty_field"/> + <validator type="unspecified_build"/> + </param> + <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names"> + <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option> + <option value="cell_last">Epigenetic factor name - Cell type name</option> + </param> + </when> + <when value="manual"> + <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1"> + <param name="cell_type_name" type="text" value="" label="Cell type name"> + <validator type="empty_field"/> + </param> + <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name"> + <validator type="empty_field"/> </param> - <when value="extract"> - <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files"> - <validator type="empty_field"/> - <validator type="unspecified_build"/> - </param> - <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names"> - <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option> - <option value="cell_last">Epigenetic factor name - Cell type name</option> - </param> - </when> - <when value="manual"> - <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1"> - <param name="cell_type_name" type="text" value="" label="Cell type name"> - <validator type="empty_field"/> - </param> - <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name"> - <validator type="empty_field"/> - </param> - <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file"> - <validator type="empty_field"/> - <validator type="unspecified_build"/> - </param> + <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file"> + <validator type="empty_field"/> + <validator type="unspecified_build"/> + </param> + </repeat> + </when> + </conditional> + <conditional name="specify_genomic_window_cond"> + <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + <param name="window_size" type="integer" value="200" label="Window size in base pairs"/> + <conditional name="restrict_chromosomes_cond"> + <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <repeat name="chrom_repeat" title="Chromosomes" min="1"> + <param name="chrom" type="text" value="" label="Chromosome"/> </repeat> </when> </conditional> - <conditional name="specify_genomic_window_cond"> - <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data"> - <option value="no" selected="true">No</option> - <option value="yes">Yes</option> - </param> - <when value="no"> - <param name="window_size" type="integer" value="200" label="Window size in base pairs"/> - <conditional name="restrict_chromosomes_cond"> - <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes"> - <option value="no" selected="true">No</option> - <option value="yes">Yes</option> - </param> - <when value="no"/> - <when value="yes"> - <repeat name="chrom_repeat" title="Chromosomes" min="1"> - <param name="chrom" type="text" value="" label="Chromosome"/> - </repeat> - </when> - </conditional> - </when> - <when value="yes"> - <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/> - </when> - </conditional> </when> - <when value="data_matrix"/> + <when value="yes"> + <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/> + </when> </conditional> <param argument="-bychr" type="boolean" truevalue="-bychr" falsevalue="" checked="False" label="Output chromosomes in separate files"/> <param name="reads_per_bp" type="select" display="radio" label="Calculate the average signal in each genomic window using"> @@ -236,6 +206,31 @@ <data name="output_state" format="txt" label="${tool.name} (epigenetic states and position classes) on ${on_string}"/> </outputs> <tests> + <test> + <param name="cell_type_epigenetic_factor" value="extract"/> + <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/> + <param name="input_name_positions" value="cell_first"/> + <param name="specify_genomic_window" value="yes"/> + <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/> + <output name="output_state" file="output_state.txt" ftype="txt"/> + <output name="output_profile" file="output_profile.txt" ftype="txt"/> + <output name="output_para" file="output_para.tabular" ftype="tabular"/> + <output name="output_cluster" file="output_cluster.txt" ftype="txt"/> + </test> + <test> + <param name="cell_type_epigenetic_factor" value="manual"/> + <repeat name="input_repeat"> + <param name="cell_type_name" value="e001" /> + <param name="epigenetic_factor_name" value="h3k4me3"/> + <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/> + </repeat> + <param name="specify_genomic_window" value="yes"/> + <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/> + <output name="output_state" file="output_state.txt" ftype="txt"/> + <output name="output_profile" file="output_profile.txt" ftype="txt"/> + <output name="output_para" file="output_para.tabular" ftype="tabular"/> + <output name="output_cluster" file="output_cluster.txt" ftype="txt"/> + </test> </tests> <help> **What it does**
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genomic_windows.bed Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,50 @@ +chr1 21819600 21819800 R100001 +chr1 21819800 21820000 R100002 +chr1 21820000 21820200 R100003 +chr1 21820200 21820400 R100004 +chr1 21820400 21820600 R100005 +chr1 21820600 21820800 R100006 +chr1 21820800 21821000 R100007 +chr1 21821000 21821200 R100008 +chr1 21821200 21821400 R100009 +chr1 21821400 21821600 R100010 +chr1 21821600 21821800 R100011 +chr1 21821800 21822000 R100012 +chr1 21822000 21822200 R100013 +chr1 21822200 21822400 R100014 +chr1 21822400 21822600 R100015 +chr1 21822600 21822800 R100016 +chr1 21822800 21823000 R100017 +chr1 21823000 21823200 R100018 +chr1 21823200 21823400 R100019 +chr1 21823400 21823600 R100020 +chr1 21823600 21823800 R100021 +chr1 21823800 21824000 R100022 +chr1 21824000 21824200 R100023 +chr1 21824200 21824400 R100024 +chr1 21824400 21824600 R100025 +chr1 21824600 21824800 R100026 +chr1 21824800 21825000 R100027 +chr1 21825000 21825200 R100028 +chr1 21825200 21825400 R100029 +chr1 21825400 21825600 R100030 +chr1 21825600 21825800 R100031 +chr1 21825800 21826000 R100032 +chr1 21826000 21826200 R100033 +chr1 21826200 21826400 R100034 +chr1 21826400 21826600 R100035 +chr1 21826600 21826800 R100036 +chr1 21826800 21827000 R100037 +chr1 21827000 21827200 R100038 +chr1 21827200 21827400 R100039 +chr1 21827400 21827600 R100040 +chr1 21827600 21827800 R100041 +chr1 21827800 21828000 R100042 +chr1 21828000 21828200 R100043 +chr1 21828200 21828400 R100044 +chr1 21828400 21828600 R100045 +chr1 21829000 21829200 R100046 +chr1 21829400 21829600 R100047 +chr1 21829600 21829800 R100048 +chr1 21829800 21830000 R100049 +chr1 21830000 21830200 R100050
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_cluster.txt Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,1 @@ +id0 0:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_para.tabular Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,2 @@ +#count h3k4me3 h3k4me3*h3k4me3 +60.000000 0.000000 10.000000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_profile.txt Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,1 @@ +50 0.943136
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_state.txt Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,51 @@ +#ID CHR POSst POSed e001 PosClass +R100001 chr1 21819600 21819800 0 0 +R100002 chr1 21819800 21820000 0 0 +R100003 chr1 21820000 21820200 0 0 +R100004 chr1 21820200 21820400 0 0 +R100005 chr1 21820400 21820600 0 0 +R100006 chr1 21820600 21820800 0 0 +R100007 chr1 21820800 21821000 0 0 +R100008 chr1 21821000 21821200 0 0 +R100009 chr1 21821200 21821400 0 0 +R100010 chr1 21821400 21821600 0 0 +R100011 chr1 21821600 21821800 0 0 +R100012 chr1 21821800 21822000 0 0 +R100013 chr1 21822000 21822200 0 0 +R100014 chr1 21822200 21822400 0 0 +R100015 chr1 21822400 21822600 0 0 +R100016 chr1 21822600 21822800 0 0 +R100017 chr1 21822800 21823000 0 0 +R100018 chr1 21823000 21823200 0 0 +R100019 chr1 21823200 21823400 0 0 +R100020 chr1 21823400 21823600 0 0 +R100021 chr1 21823600 21823800 0 0 +R100022 chr1 21823800 21824000 0 0 +R100023 chr1 21824000 21824200 0 0 +R100024 chr1 21824200 21824400 0 0 +R100025 chr1 21824400 21824600 0 0 +R100026 chr1 21824600 21824800 0 0 +R100027 chr1 21824800 21825000 0 0 +R100028 chr1 21825000 21825200 0 0 +R100029 chr1 21825200 21825400 0 0 +R100030 chr1 21825400 21825600 0 0 +R100031 chr1 21825600 21825800 0 0 +R100032 chr1 21825800 21826000 0 0 +R100033 chr1 21826000 21826200 0 0 +R100034 chr1 21826200 21826400 0 0 +R100035 chr1 21826400 21826600 0 0 +R100036 chr1 21826600 21826800 0 0 +R100037 chr1 21826800 21827000 0 0 +R100038 chr1 21827000 21827200 0 0 +R100039 chr1 21827200 21827400 0 0 +R100040 chr1 21827400 21827600 0 0 +R100041 chr1 21827600 21827800 0 0 +R100042 chr1 21827800 21828000 0 0 +R100043 chr1 21828000 21828200 0 0 +R100044 chr1 21828200 21828400 0 0 +R100045 chr1 21828400 21828600 0 0 +R100046 chr1 21829000 21829200 0 0 +R100047 chr1 21829400 21829600 0 0 +R100048 chr1 21829600 21829800 0 0 +R100049 chr1 21829800 21830000 0 0 +R100050 chr1 21830000 21830200 0 0