# HG changeset patch
# User greg
# Date 1504014392 14400
# Node ID 029e18c3c17b3ecae3e144bff8738c7b4c3b1601
# Parent d6ab97fb8aca31e8100cfefb17ef6fa300d763f6
Uploaded
diff -r d6ab97fb8aca -r 029e18c3c17b build_matrix.R
--- a/build_matrix.R Fri Aug 25 12:22:59 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(library("optparse"))
-
-option_list <- list(
- make_option(c("-i", "--input"), action="store", dest="input", help="Input .bed.gz file produced by prepMat"),
- make_option(c("-o", "--output"), action="store", dest="output", help="Output file"),
- make_option(c("-w", "--work_dir"), action="store", dest="work_dir", help="Working directory")
-)
-
-parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
-args <- parse_args(parser, positional_arguments=TRUE)
-opt <- args$options
-
-data_table <- read.table(opt$input)
-as.matrix(data_table)
-status <- match(data_table[,3], missing)
-data_table[,3] <- paste(opt$work_dir, data_table[,1], ".", data_table[,2], ".bed.gz", sep="")
-data_table <- data_table[is.na(status)==TRUE,]
-write.table(array(data_table, dim=c(length(data_table)/3, 3)), file=opt$output, quote=FALSE, row.names=FALSE, col.names=FALSE)
diff -r d6ab97fb8aca -r 029e18c3c17b ideas.xml
--- a/ideas.xml Fri Aug 25 12:22:59 2017 -0400
+++ b/ideas.xml Tue Aug 29 09:46:32 2017 -0400
@@ -15,29 +15,23 @@
##############################################
## Create the config file and prepare the data
##############################################
-#set input_type = $input_type_cond.input_type
-#if str($input_type) == "datasets":
- #set cell_type_epigenetic_factor_cond = $input_type_cond.cell_type_epigenetic_factor_cond
- #set cell_type_epigenetic_factor = $cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor
- #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond
- #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
- cp '$gen_prep_input_config' $prep_input_config &&
- prepMat
- $prep_input_config
- #if str($specify_genomic_window) == "yes":
- -bed '$specify_genomic_window_cond.bed_input'
- #else:
- -gsz '$chromInfo'
- -wsz $specify_genomic_window_cond.window_size
- #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
- #if str($restrict_chromosomes) == "yes":
- #set chroms = []
- #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
- #for $i in $chrom_repeat.chrom
- $chroms.append($i)
- #end for
- -chr ",".join(chroms)
- #end if
+#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
+cp '$gen_prep_input_config' $prep_input_config &&
+prepMat
+$prep_input_config
+#if str($specify_genomic_window) == "yes":
+ -bed '$specify_genomic_window_cond.bed_input'
+#else:
+ -gsz '$chromInfo'
+ -wsz $specify_genomic_window_cond.window_size
+ #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
+ #if str($restrict_chromosomes) == "yes":
+ #set chroms = []
+ #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
+ #for $i in $chrom_repeat.chrom
+ $chroms.append($i)
+ #end for
+ -chr ",".join(chroms)
#end if
#end if
$bychr
@@ -48,31 +42,19 @@
$norm
##############################################
## Coerce the prepMat config output to the
-## format expected by the R matrix builder.
+## format expected by IDEAS.
##############################################
&& cut -d' ' $prep_input_config -f1,2 > file1.txt
&& ls tmp/*.bed.gz > file2.txt
-&& paste <(cat file1.txt) <(cat file2.txt) > $prep_output_config
-##############################################
-## Build the R matrix from the prepMat output
-##############################################
-##&& Rscript '$__tool_directory__/build_matrix.R'
-##-i $tmp_dir/*.bed.gz
-##-o $ideas_matrix_input_file
-##-w $ideas_input_dir
+&& paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config
##############################################
## Run IDEAS
##############################################
&& ideas
'$prep_output_config'
-#if str($input_type) == "datasets":
- #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond
- #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
- #if str($specify_genomic_window) == "yes":
- '$specify_genomic_window_cond.bed_input'
- #else:
- $tmp_dir/*.bed
- #end if
+#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
+#if str($specify_genomic_window) == "yes":
+ '$specify_genomic_window_cond.bed_input'
#else:
$tmp_dir/*.bed
#end if
@@ -115,93 +97,81 @@
]]>
= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
- #set file_name = $file_name_with_ext.split(".")[0]
- #if str($input_name_positions) == "cell_first":
- #set cell_type_name = $file_name.split("-")[0]
- #set epigenetic_factor_name = $file_name.split("-")[1]
- #else:
- #set cell_type_name = $file_name.split("-")[1]
- #set epigenetic_factor_name = $file_name.split("-")[0]
- #end if
+#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract":
+ #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
+ #for $i in $cell_type_epigenetic_factor_cond.input:
+ #set file_name_with_ext = $i.name
+ #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
+ #set file_name = $file_name_with_ext.split(".")[0]
+ #if str($input_name_positions) == "cell_first":
+ #set cell_type_name = $file_name.split("-")[0]
+ #set epigenetic_factor_name = $file_name.split("-")[1]
+ #else:
+ #set cell_type_name = $file_name.split("-")[1]
+ #set epigenetic_factor_name = $file_name.split("-")[0]
+ #end if
${cell_type_name} ${epigenetic_factor_name} ${i}
- #end for
- #else:
- #for $input_items in $input_type_cond.cell_type_epigenetic_factor_cond.input_repeat:
+ #end for
+#else:
+ #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat:
${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input}
- #end for
- #end if
+ #end for
#end if]]>
-
-
-
-
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
@@ -236,6 +206,31 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
**What it does**
diff -r d6ab97fb8aca -r 029e18c3c17b test-data/e001-h3k4me3.bigwig
Binary file test-data/e001-h3k4me3.bigwig has changed
diff -r d6ab97fb8aca -r 029e18c3c17b test-data/genomic_windows.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genomic_windows.bed Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,50 @@
+chr1 21819600 21819800 R100001
+chr1 21819800 21820000 R100002
+chr1 21820000 21820200 R100003
+chr1 21820200 21820400 R100004
+chr1 21820400 21820600 R100005
+chr1 21820600 21820800 R100006
+chr1 21820800 21821000 R100007
+chr1 21821000 21821200 R100008
+chr1 21821200 21821400 R100009
+chr1 21821400 21821600 R100010
+chr1 21821600 21821800 R100011
+chr1 21821800 21822000 R100012
+chr1 21822000 21822200 R100013
+chr1 21822200 21822400 R100014
+chr1 21822400 21822600 R100015
+chr1 21822600 21822800 R100016
+chr1 21822800 21823000 R100017
+chr1 21823000 21823200 R100018
+chr1 21823200 21823400 R100019
+chr1 21823400 21823600 R100020
+chr1 21823600 21823800 R100021
+chr1 21823800 21824000 R100022
+chr1 21824000 21824200 R100023
+chr1 21824200 21824400 R100024
+chr1 21824400 21824600 R100025
+chr1 21824600 21824800 R100026
+chr1 21824800 21825000 R100027
+chr1 21825000 21825200 R100028
+chr1 21825200 21825400 R100029
+chr1 21825400 21825600 R100030
+chr1 21825600 21825800 R100031
+chr1 21825800 21826000 R100032
+chr1 21826000 21826200 R100033
+chr1 21826200 21826400 R100034
+chr1 21826400 21826600 R100035
+chr1 21826600 21826800 R100036
+chr1 21826800 21827000 R100037
+chr1 21827000 21827200 R100038
+chr1 21827200 21827400 R100039
+chr1 21827400 21827600 R100040
+chr1 21827600 21827800 R100041
+chr1 21827800 21828000 R100042
+chr1 21828000 21828200 R100043
+chr1 21828200 21828400 R100044
+chr1 21828400 21828600 R100045
+chr1 21829000 21829200 R100046
+chr1 21829400 21829600 R100047
+chr1 21829600 21829800 R100048
+chr1 21829800 21830000 R100049
+chr1 21830000 21830200 R100050
diff -r d6ab97fb8aca -r 029e18c3c17b test-data/output_cluster.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_cluster.txt Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,1 @@
+id0 0:0
diff -r d6ab97fb8aca -r 029e18c3c17b test-data/output_para.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_para.tabular Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,2 @@
+#count h3k4me3 h3k4me3*h3k4me3
+60.000000 0.000000 10.000000
diff -r d6ab97fb8aca -r 029e18c3c17b test-data/output_profile.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_profile.txt Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,1 @@
+50 0.943136
diff -r d6ab97fb8aca -r 029e18c3c17b test-data/output_state.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_state.txt Tue Aug 29 09:46:32 2017 -0400
@@ -0,0 +1,51 @@
+#ID CHR POSst POSed e001 PosClass
+R100001 chr1 21819600 21819800 0 0
+R100002 chr1 21819800 21820000 0 0
+R100003 chr1 21820000 21820200 0 0
+R100004 chr1 21820200 21820400 0 0
+R100005 chr1 21820400 21820600 0 0
+R100006 chr1 21820600 21820800 0 0
+R100007 chr1 21820800 21821000 0 0
+R100008 chr1 21821000 21821200 0 0
+R100009 chr1 21821200 21821400 0 0
+R100010 chr1 21821400 21821600 0 0
+R100011 chr1 21821600 21821800 0 0
+R100012 chr1 21821800 21822000 0 0
+R100013 chr1 21822000 21822200 0 0
+R100014 chr1 21822200 21822400 0 0
+R100015 chr1 21822400 21822600 0 0
+R100016 chr1 21822600 21822800 0 0
+R100017 chr1 21822800 21823000 0 0
+R100018 chr1 21823000 21823200 0 0
+R100019 chr1 21823200 21823400 0 0
+R100020 chr1 21823400 21823600 0 0
+R100021 chr1 21823600 21823800 0 0
+R100022 chr1 21823800 21824000 0 0
+R100023 chr1 21824000 21824200 0 0
+R100024 chr1 21824200 21824400 0 0
+R100025 chr1 21824400 21824600 0 0
+R100026 chr1 21824600 21824800 0 0
+R100027 chr1 21824800 21825000 0 0
+R100028 chr1 21825000 21825200 0 0
+R100029 chr1 21825200 21825400 0 0
+R100030 chr1 21825400 21825600 0 0
+R100031 chr1 21825600 21825800 0 0
+R100032 chr1 21825800 21826000 0 0
+R100033 chr1 21826000 21826200 0 0
+R100034 chr1 21826200 21826400 0 0
+R100035 chr1 21826400 21826600 0 0
+R100036 chr1 21826600 21826800 0 0
+R100037 chr1 21826800 21827000 0 0
+R100038 chr1 21827000 21827200 0 0
+R100039 chr1 21827200 21827400 0 0
+R100040 chr1 21827400 21827600 0 0
+R100041 chr1 21827600 21827800 0 0
+R100042 chr1 21827800 21828000 0 0
+R100043 chr1 21828000 21828200 0 0
+R100044 chr1 21828200 21828400 0 0
+R100045 chr1 21828400 21828600 0 0
+R100046 chr1 21829000 21829200 0 0
+R100047 chr1 21829400 21829600 0 0
+R100048 chr1 21829600 21829800 0 0
+R100049 chr1 21829800 21830000 0 0
+R100050 chr1 21830000 21830200 0 0