changeset 90:029e18c3c17b draft

Uploaded
author greg
date Tue, 29 Aug 2017 09:46:32 -0400
parents d6ab97fb8aca
children 6f8af8e816d0
files build_matrix.R ideas.xml test-data/e001-h3k4me3.bigwig test-data/genomic_windows.bed test-data/output_cluster.txt test-data/output_para.tabular test-data/output_profile.txt test-data/output_state.txt
diffstat 8 files changed, 216 insertions(+), 136 deletions(-) [+]
line wrap: on
line diff
--- a/build_matrix.R	Fri Aug 25 12:22:59 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-option_list <- list(
-    make_option(c("-i", "--input"), action="store", dest="input", help="Input .bed.gz file produced by prepMat"),
-    make_option(c("-o", "--output"), action="store", dest="output", help="Output file"),
-    make_option(c("-w", "--work_dir"), action="store", dest="work_dir", help="Working directory")
-)
-
-parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
-args <- parse_args(parser, positional_arguments=TRUE)
-opt <- args$options
-
-data_table <- read.table(opt$input)
-as.matrix(data_table)
-status <- match(data_table[,3], missing)
-data_table[,3] <- paste(opt$work_dir, data_table[,1], ".", data_table[,2], ".bed.gz", sep="")
-data_table <- data_table[is.na(status)==TRUE,]
-write.table(array(data_table, dim=c(length(data_table)/3, 3)), file=opt$output, quote=FALSE, row.names=FALSE, col.names=FALSE)
--- a/ideas.xml	Fri Aug 25 12:22:59 2017 -0400
+++ b/ideas.xml	Tue Aug 29 09:46:32 2017 -0400
@@ -15,29 +15,23 @@
 ##############################################
 ## Create the config file and prepare the data
 ##############################################
-#set input_type = $input_type_cond.input_type
-#if str($input_type) == "datasets":
-    #set cell_type_epigenetic_factor_cond = $input_type_cond.cell_type_epigenetic_factor_cond
-    #set cell_type_epigenetic_factor = $cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor
-    #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond
-    #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
-    cp '$gen_prep_input_config' $prep_input_config &&
-    prepMat
-    $prep_input_config
-    #if str($specify_genomic_window) == "yes":
-        -bed '$specify_genomic_window_cond.bed_input'
-    #else:
-        -gsz '$chromInfo'
-        -wsz $specify_genomic_window_cond.window_size
-        #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
-        #if str($restrict_chromosomes) == "yes":
-            #set chroms = []
-            #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
-            #for $i in $chrom_repeat.chrom
-                $chroms.append($i)
-            #end for
-            -chr ",".join(chroms)
-        #end if
+#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
+cp '$gen_prep_input_config' $prep_input_config &&
+prepMat
+$prep_input_config
+#if str($specify_genomic_window) == "yes":
+    -bed '$specify_genomic_window_cond.bed_input'
+#else:
+    -gsz '$chromInfo'
+    -wsz $specify_genomic_window_cond.window_size
+    #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
+    #if str($restrict_chromosomes) == "yes":
+        #set chroms = []
+        #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
+        #for $i in $chrom_repeat.chrom
+            $chroms.append($i)
+        #end for
+        -chr ",".join(chroms)
     #end if
 #end if
 $bychr
@@ -48,31 +42,19 @@
 $norm
 ##############################################
 ## Coerce the prepMat config output to the
-## format expected by the R matrix builder.
+## format expected by IDEAS.
 ##############################################
 && cut -d' ' $prep_input_config -f1,2 > file1.txt
 && ls tmp/*.bed.gz > file2.txt
-&& paste <(cat file1.txt) <(cat file2.txt) > $prep_output_config
-##############################################
-## Build the R matrix from the prepMat output
-##############################################
-##&& Rscript '$__tool_directory__/build_matrix.R'
-##-i $tmp_dir/*.bed.gz
-##-o $ideas_matrix_input_file
-##-w $ideas_input_dir
+&& paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config
 ##############################################
 ## Run IDEAS
 ##############################################
 && ideas
 '$prep_output_config'
-#if str($input_type) == "datasets":
-    #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond
-    #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
-    #if str($specify_genomic_window) == "yes":
-        '$specify_genomic_window_cond.bed_input'
-    #else:
-        $tmp_dir/*.bed
-    #end if
+#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
+#if str($specify_genomic_window) == "yes":
+    '$specify_genomic_window_cond.bed_input'
 #else:
     $tmp_dir/*.bed
 #end if
@@ -115,93 +97,81 @@
     ]]></command>
     <configfiles>
         <configfile name="gen_prep_input_config"><![CDATA[#import os
-#if str($input_type_cond.input_type) == "datasets":
-    #if str($input_type_cond.cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract":
-        #set cell_type_epigenetic_factor_cond = $input_type_cond.cell_type_epigenetic_factor_cond
-        #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
-        #for $i in $cell_type_epigenetic_factor_cond.input:
-            #set file_name_with_ext = $i.name
-            #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
-            #set file_name = $file_name_with_ext.split(".")[0]
-            #if str($input_name_positions) == "cell_first":
-                #set cell_type_name = $file_name.split("-")[0]
-                #set epigenetic_factor_name = $file_name.split("-")[1]
-            #else:
-                #set cell_type_name = $file_name.split("-")[1]
-                #set epigenetic_factor_name = $file_name.split("-")[0]
-            #end if
+#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract":
+    #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
+    #for $i in $cell_type_epigenetic_factor_cond.input:
+        #set file_name_with_ext = $i.name
+        #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
+        #set file_name = $file_name_with_ext.split(".")[0]
+        #if str($input_name_positions) == "cell_first":
+            #set cell_type_name = $file_name.split("-")[0]
+            #set epigenetic_factor_name = $file_name.split("-")[1]
+        #else:
+            #set cell_type_name = $file_name.split("-")[1]
+            #set epigenetic_factor_name = $file_name.split("-")[0]
+        #end if
 ${cell_type_name} ${epigenetic_factor_name} ${i}
-        #end for
-    #else:
-        #for $input_items in $input_type_cond.cell_type_epigenetic_factor_cond.input_repeat:
+    #end for
+#else:
+    #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat:
 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input}
-        #end for
-    #end if
+    #end for
 #end if]]></configfile>
     </configfiles>
     <inputs>
-        <conditional name="input_type_cond">
-            <param name="input_type" type="select" label="Select input type">
-                <option value="datasets" selected="true">Bam, BigWig files</option>
-                <option value="data_matrix">Data matrix</option>
+        <conditional name="cell_type_epigenetic_factor_cond">
+            <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by">
+                <option value="extract" selected="true">extracting them from the selected input file names</option>
+                <option value="manual">manually setting them for each selected input</option>
             </param>
-            <when value="datasets">
-                <conditional name="cell_type_epigenetic_factor_cond">
-                    <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by">
-                        <option value="extract" selected="true">extracting them from the selected input file names</option>
-                        <option value="manual">manually setting them for each selected input</option>
+            <when value="extract">
+                <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files">
+                    <validator type="empty_field"/>
+                    <validator type="unspecified_build"/>
+                </param>
+                <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names">
+                    <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
+                    <option value="cell_last">Epigenetic factor name - Cell type name</option>
+                </param>
+            </when>
+            <when value="manual">
+                <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1">
+                    <param name="cell_type_name" type="text" value="" label="Cell type name">
+                        <validator type="empty_field"/>
+                    </param>
+                    <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
+                        <validator type="empty_field"/>
                     </param>
-                    <when value="extract">
-                        <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files">
-                            <validator type="empty_field"/>
-                            <validator type="unspecified_build"/>
-                        </param>
-                        <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names">
-                            <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
-                            <option value="cell_last">Epigenetic factor name - Cell type name</option>
-                        </param>
-                    </when>
-                    <when value="manual">
-                        <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1">
-                            <param name="cell_type_name" type="text" value="" label="Cell type name">
-                                <validator type="empty_field"/>
-                            </param>
-                            <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
-                                <validator type="empty_field"/>
-                            </param>
-                            <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file">
-                                <validator type="empty_field"/>
-                                <validator type="unspecified_build"/>
-                            </param>
+                    <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file">
+                        <validator type="empty_field"/>
+                        <validator type="unspecified_build"/>
+                    </param>
+                </repeat>
+            </when>
+        </conditional>
+        <conditional name="specify_genomic_window_cond">
+            <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no">
+                <param name="window_size" type="integer" value="200" label="Window size in base pairs"/>
+                <conditional name="restrict_chromosomes_cond">
+                    <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no"/>
+                    <when value="yes">
+                        <repeat name="chrom_repeat" title="Chromosomes" min="1">
+                            <param name="chrom" type="text" value="" label="Chromosome"/>
                         </repeat>
                     </when>
                 </conditional>
-                <conditional name="specify_genomic_window_cond">
-                    <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data">
-                        <option value="no" selected="true">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    <when value="no">
-                        <param name="window_size" type="integer" value="200" label="Window size in base pairs"/>
-                        <conditional name="restrict_chromosomes_cond">
-                            <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes">
-                                <option value="no" selected="true">No</option>
-                                <option value="yes">Yes</option>
-                            </param>
-                            <when value="no"/>
-                            <when value="yes">
-                                <repeat name="chrom_repeat" title="Chromosomes" min="1">
-                                    <param name="chrom" type="text" value="" label="Chromosome"/>
-                                </repeat>
-                            </when>
-                        </conditional>
-                    </when>
-                    <when value="yes">
-                        <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/>
-                    </when>
-                </conditional>
             </when>
-            <when value="data_matrix"/>
+            <when value="yes">
+                <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/>
+            </when>
         </conditional>
         <param argument="-bychr" type="boolean" truevalue="-bychr" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
         <param name="reads_per_bp" type="select" display="radio" label="Calculate the average signal in each genomic window using">
@@ -236,6 +206,31 @@
         <data name="output_state" format="txt" label="${tool.name} (epigenetic states and position classes) on ${on_string}"/>
     </outputs>
     <tests>
+        <test>
+            <param name="cell_type_epigenetic_factor" value="extract"/>
+            <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
+            <param name="input_name_positions" value="cell_first"/>
+            <param name="specify_genomic_window" value="yes"/>
+            <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/>
+            <output name="output_state" file="output_state.txt" ftype="txt"/>
+            <output name="output_profile" file="output_profile.txt" ftype="txt"/>
+            <output name="output_para" file="output_para.tabular" ftype="tabular"/>
+            <output name="output_cluster" file="output_cluster.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="cell_type_epigenetic_factor" value="manual"/>
+            <repeat name="input_repeat">
+                <param name="cell_type_name" value="e001" />
+                <param name="epigenetic_factor_name" value="h3k4me3"/>
+                <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
+            </repeat>
+            <param name="specify_genomic_window" value="yes"/>
+            <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/>
+            <output name="output_state" file="output_state.txt" ftype="txt"/>
+            <output name="output_profile" file="output_profile.txt" ftype="txt"/>
+            <output name="output_para" file="output_para.tabular" ftype="tabular"/>
+            <output name="output_cluster" file="output_cluster.txt" ftype="txt"/>
+        </test>
     </tests>
     <help>
 **What it does**
Binary file test-data/e001-h3k4me3.bigwig has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genomic_windows.bed	Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,50 @@
+chr1 21819600 21819800 R100001
+chr1 21819800 21820000 R100002
+chr1 21820000 21820200 R100003
+chr1 21820200 21820400 R100004
+chr1 21820400 21820600 R100005
+chr1 21820600 21820800 R100006
+chr1 21820800 21821000 R100007
+chr1 21821000 21821200 R100008
+chr1 21821200 21821400 R100009
+chr1 21821400 21821600 R100010
+chr1 21821600 21821800 R100011
+chr1 21821800 21822000 R100012
+chr1 21822000 21822200 R100013
+chr1 21822200 21822400 R100014
+chr1 21822400 21822600 R100015
+chr1 21822600 21822800 R100016
+chr1 21822800 21823000 R100017
+chr1 21823000 21823200 R100018
+chr1 21823200 21823400 R100019
+chr1 21823400 21823600 R100020
+chr1 21823600 21823800 R100021
+chr1 21823800 21824000 R100022
+chr1 21824000 21824200 R100023
+chr1 21824200 21824400 R100024
+chr1 21824400 21824600 R100025
+chr1 21824600 21824800 R100026
+chr1 21824800 21825000 R100027
+chr1 21825000 21825200 R100028
+chr1 21825200 21825400 R100029
+chr1 21825400 21825600 R100030
+chr1 21825600 21825800 R100031
+chr1 21825800 21826000 R100032
+chr1 21826000 21826200 R100033
+chr1 21826200 21826400 R100034
+chr1 21826400 21826600 R100035
+chr1 21826600 21826800 R100036
+chr1 21826800 21827000 R100037
+chr1 21827000 21827200 R100038
+chr1 21827200 21827400 R100039
+chr1 21827400 21827600 R100040
+chr1 21827600 21827800 R100041
+chr1 21827800 21828000 R100042
+chr1 21828000 21828200 R100043
+chr1 21828200 21828400 R100044
+chr1 21828400 21828600 R100045
+chr1 21829000 21829200 R100046
+chr1 21829400 21829600 R100047
+chr1 21829600 21829800 R100048
+chr1 21829800 21830000 R100049
+chr1 21830000 21830200 R100050
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_cluster.txt	Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,1 @@
+id0 0:0 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_para.tabular	Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,2 @@
+#count	h3k4me3	h3k4me3*h3k4me3	
+60.000000	0.000000	10.000000	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_profile.txt	Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,1 @@
+50 0.943136 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_state.txt	Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,51 @@
+#ID CHR POSst POSed e001 PosClass
+R100001 chr1 21819600 21819800 0 0
+R100002 chr1 21819800 21820000 0 0
+R100003 chr1 21820000 21820200 0 0
+R100004 chr1 21820200 21820400 0 0
+R100005 chr1 21820400 21820600 0 0
+R100006 chr1 21820600 21820800 0 0
+R100007 chr1 21820800 21821000 0 0
+R100008 chr1 21821000 21821200 0 0
+R100009 chr1 21821200 21821400 0 0
+R100010 chr1 21821400 21821600 0 0
+R100011 chr1 21821600 21821800 0 0
+R100012 chr1 21821800 21822000 0 0
+R100013 chr1 21822000 21822200 0 0
+R100014 chr1 21822200 21822400 0 0
+R100015 chr1 21822400 21822600 0 0
+R100016 chr1 21822600 21822800 0 0
+R100017 chr1 21822800 21823000 0 0
+R100018 chr1 21823000 21823200 0 0
+R100019 chr1 21823200 21823400 0 0
+R100020 chr1 21823400 21823600 0 0
+R100021 chr1 21823600 21823800 0 0
+R100022 chr1 21823800 21824000 0 0
+R100023 chr1 21824000 21824200 0 0
+R100024 chr1 21824200 21824400 0 0
+R100025 chr1 21824400 21824600 0 0
+R100026 chr1 21824600 21824800 0 0
+R100027 chr1 21824800 21825000 0 0
+R100028 chr1 21825000 21825200 0 0
+R100029 chr1 21825200 21825400 0 0
+R100030 chr1 21825400 21825600 0 0
+R100031 chr1 21825600 21825800 0 0
+R100032 chr1 21825800 21826000 0 0
+R100033 chr1 21826000 21826200 0 0
+R100034 chr1 21826200 21826400 0 0
+R100035 chr1 21826400 21826600 0 0
+R100036 chr1 21826600 21826800 0 0
+R100037 chr1 21826800 21827000 0 0
+R100038 chr1 21827000 21827200 0 0
+R100039 chr1 21827200 21827400 0 0
+R100040 chr1 21827400 21827600 0 0
+R100041 chr1 21827600 21827800 0 0
+R100042 chr1 21827800 21828000 0 0
+R100043 chr1 21828000 21828200 0 0
+R100044 chr1 21828200 21828400 0 0
+R100045 chr1 21828400 21828600 0 0
+R100046 chr1 21829000 21829200 0 0
+R100047 chr1 21829400 21829600 0 0
+R100048 chr1 21829600 21829800 0 0
+R100049 chr1 21829800 21830000 0 0
+R100050 chr1 21830000 21830200 0 0