# HG changeset patch # User greg # Date 1504014392 14400 # Node ID 029e18c3c17b3ecae3e144bff8738c7b4c3b1601 # Parent d6ab97fb8aca31e8100cfefb17ef6fa300d763f6 Uploaded diff -r d6ab97fb8aca -r 029e18c3c17b build_matrix.R --- a/build_matrix.R Fri Aug 25 12:22:59 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -option_list <- list( - make_option(c("-i", "--input"), action="store", dest="input", help="Input .bed.gz file produced by prepMat"), - make_option(c("-o", "--output"), action="store", dest="output", help="Output file"), - make_option(c("-w", "--work_dir"), action="store", dest="work_dir", help="Working directory") -) - -parser <- OptionParser(usage="%prog [options] file", option_list=option_list) -args <- parse_args(parser, positional_arguments=TRUE) -opt <- args$options - -data_table <- read.table(opt$input) -as.matrix(data_table) -status <- match(data_table[,3], missing) -data_table[,3] <- paste(opt$work_dir, data_table[,1], ".", data_table[,2], ".bed.gz", sep="") -data_table <- data_table[is.na(status)==TRUE,] -write.table(array(data_table, dim=c(length(data_table)/3, 3)), file=opt$output, quote=FALSE, row.names=FALSE, col.names=FALSE) diff -r d6ab97fb8aca -r 029e18c3c17b ideas.xml --- a/ideas.xml Fri Aug 25 12:22:59 2017 -0400 +++ b/ideas.xml Tue Aug 29 09:46:32 2017 -0400 @@ -15,29 +15,23 @@ ############################################## ## Create the config file and prepare the data ############################################## -#set input_type = $input_type_cond.input_type -#if str($input_type) == "datasets": - #set cell_type_epigenetic_factor_cond = $input_type_cond.cell_type_epigenetic_factor_cond - #set cell_type_epigenetic_factor = $cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor - #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond - #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window - cp '$gen_prep_input_config' $prep_input_config && - prepMat - $prep_input_config - #if str($specify_genomic_window) == "yes": - -bed '$specify_genomic_window_cond.bed_input' - #else: - -gsz '$chromInfo' - -wsz $specify_genomic_window_cond.window_size - #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes - #if str($restrict_chromosomes) == "yes": - #set chroms = [] - #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat - #for $i in $chrom_repeat.chrom - $chroms.append($i) - #end for - -chr ",".join(chroms) - #end if +#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window +cp '$gen_prep_input_config' $prep_input_config && +prepMat +$prep_input_config +#if str($specify_genomic_window) == "yes": + -bed '$specify_genomic_window_cond.bed_input' +#else: + -gsz '$chromInfo' + -wsz $specify_genomic_window_cond.window_size + #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes + #if str($restrict_chromosomes) == "yes": + #set chroms = [] + #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat + #for $i in $chrom_repeat.chrom + $chroms.append($i) + #end for + -chr ",".join(chroms) #end if #end if $bychr @@ -48,31 +42,19 @@ $norm ############################################## ## Coerce the prepMat config output to the -## format expected by the R matrix builder. +## format expected by IDEAS. ############################################## && cut -d' ' $prep_input_config -f1,2 > file1.txt && ls tmp/*.bed.gz > file2.txt -&& paste <(cat file1.txt) <(cat file2.txt) > $prep_output_config -############################################## -## Build the R matrix from the prepMat output -############################################## -##&& Rscript '$__tool_directory__/build_matrix.R' -##-i $tmp_dir/*.bed.gz -##-o $ideas_matrix_input_file -##-w $ideas_input_dir +&& paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config ############################################## ## Run IDEAS ############################################## && ideas '$prep_output_config' -#if str($input_type) == "datasets": - #set specify_genomic_window_cond = $input_type_cond.specify_genomic_window_cond - #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window - #if str($specify_genomic_window) == "yes": - '$specify_genomic_window_cond.bed_input' - #else: - $tmp_dir/*.bed - #end if +#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window +#if str($specify_genomic_window) == "yes": + '$specify_genomic_window_cond.bed_input' #else: $tmp_dir/*.bed #end if @@ -115,93 +97,81 @@ ]]> = 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext - #set file_name = $file_name_with_ext.split(".")[0] - #if str($input_name_positions) == "cell_first": - #set cell_type_name = $file_name.split("-")[0] - #set epigenetic_factor_name = $file_name.split("-")[1] - #else: - #set cell_type_name = $file_name.split("-")[1] - #set epigenetic_factor_name = $file_name.split("-")[0] - #end if +#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract": + #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions + #for $i in $cell_type_epigenetic_factor_cond.input: + #set file_name_with_ext = $i.name + #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext + #set file_name = $file_name_with_ext.split(".")[0] + #if str($input_name_positions) == "cell_first": + #set cell_type_name = $file_name.split("-")[0] + #set epigenetic_factor_name = $file_name.split("-")[1] + #else: + #set cell_type_name = $file_name.split("-")[1] + #set epigenetic_factor_name = $file_name.split("-")[0] + #end if ${cell_type_name} ${epigenetic_factor_name} ${i} - #end for - #else: - #for $input_items in $input_type_cond.cell_type_epigenetic_factor_cond.input_repeat: + #end for +#else: + #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat: ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input} - #end for - #end if + #end for #end if]]> - - - - + + + + - - - - - + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - + + + @@ -236,6 +206,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + **What it does** diff -r d6ab97fb8aca -r 029e18c3c17b test-data/e001-h3k4me3.bigwig Binary file test-data/e001-h3k4me3.bigwig has changed diff -r d6ab97fb8aca -r 029e18c3c17b test-data/genomic_windows.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genomic_windows.bed Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,50 @@ +chr1 21819600 21819800 R100001 +chr1 21819800 21820000 R100002 +chr1 21820000 21820200 R100003 +chr1 21820200 21820400 R100004 +chr1 21820400 21820600 R100005 +chr1 21820600 21820800 R100006 +chr1 21820800 21821000 R100007 +chr1 21821000 21821200 R100008 +chr1 21821200 21821400 R100009 +chr1 21821400 21821600 R100010 +chr1 21821600 21821800 R100011 +chr1 21821800 21822000 R100012 +chr1 21822000 21822200 R100013 +chr1 21822200 21822400 R100014 +chr1 21822400 21822600 R100015 +chr1 21822600 21822800 R100016 +chr1 21822800 21823000 R100017 +chr1 21823000 21823200 R100018 +chr1 21823200 21823400 R100019 +chr1 21823400 21823600 R100020 +chr1 21823600 21823800 R100021 +chr1 21823800 21824000 R100022 +chr1 21824000 21824200 R100023 +chr1 21824200 21824400 R100024 +chr1 21824400 21824600 R100025 +chr1 21824600 21824800 R100026 +chr1 21824800 21825000 R100027 +chr1 21825000 21825200 R100028 +chr1 21825200 21825400 R100029 +chr1 21825400 21825600 R100030 +chr1 21825600 21825800 R100031 +chr1 21825800 21826000 R100032 +chr1 21826000 21826200 R100033 +chr1 21826200 21826400 R100034 +chr1 21826400 21826600 R100035 +chr1 21826600 21826800 R100036 +chr1 21826800 21827000 R100037 +chr1 21827000 21827200 R100038 +chr1 21827200 21827400 R100039 +chr1 21827400 21827600 R100040 +chr1 21827600 21827800 R100041 +chr1 21827800 21828000 R100042 +chr1 21828000 21828200 R100043 +chr1 21828200 21828400 R100044 +chr1 21828400 21828600 R100045 +chr1 21829000 21829200 R100046 +chr1 21829400 21829600 R100047 +chr1 21829600 21829800 R100048 +chr1 21829800 21830000 R100049 +chr1 21830000 21830200 R100050 diff -r d6ab97fb8aca -r 029e18c3c17b test-data/output_cluster.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_cluster.txt Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,1 @@ +id0 0:0 diff -r d6ab97fb8aca -r 029e18c3c17b test-data/output_para.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_para.tabular Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,2 @@ +#count h3k4me3 h3k4me3*h3k4me3 +60.000000 0.000000 10.000000 diff -r d6ab97fb8aca -r 029e18c3c17b test-data/output_profile.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_profile.txt Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,1 @@ +50 0.943136 diff -r d6ab97fb8aca -r 029e18c3c17b test-data/output_state.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_state.txt Tue Aug 29 09:46:32 2017 -0400 @@ -0,0 +1,51 @@ +#ID CHR POSst POSed e001 PosClass +R100001 chr1 21819600 21819800 0 0 +R100002 chr1 21819800 21820000 0 0 +R100003 chr1 21820000 21820200 0 0 +R100004 chr1 21820200 21820400 0 0 +R100005 chr1 21820400 21820600 0 0 +R100006 chr1 21820600 21820800 0 0 +R100007 chr1 21820800 21821000 0 0 +R100008 chr1 21821000 21821200 0 0 +R100009 chr1 21821200 21821400 0 0 +R100010 chr1 21821400 21821600 0 0 +R100011 chr1 21821600 21821800 0 0 +R100012 chr1 21821800 21822000 0 0 +R100013 chr1 21822000 21822200 0 0 +R100014 chr1 21822200 21822400 0 0 +R100015 chr1 21822400 21822600 0 0 +R100016 chr1 21822600 21822800 0 0 +R100017 chr1 21822800 21823000 0 0 +R100018 chr1 21823000 21823200 0 0 +R100019 chr1 21823200 21823400 0 0 +R100020 chr1 21823400 21823600 0 0 +R100021 chr1 21823600 21823800 0 0 +R100022 chr1 21823800 21824000 0 0 +R100023 chr1 21824000 21824200 0 0 +R100024 chr1 21824200 21824400 0 0 +R100025 chr1 21824400 21824600 0 0 +R100026 chr1 21824600 21824800 0 0 +R100027 chr1 21824800 21825000 0 0 +R100028 chr1 21825000 21825200 0 0 +R100029 chr1 21825200 21825400 0 0 +R100030 chr1 21825400 21825600 0 0 +R100031 chr1 21825600 21825800 0 0 +R100032 chr1 21825800 21826000 0 0 +R100033 chr1 21826000 21826200 0 0 +R100034 chr1 21826200 21826400 0 0 +R100035 chr1 21826400 21826600 0 0 +R100036 chr1 21826600 21826800 0 0 +R100037 chr1 21826800 21827000 0 0 +R100038 chr1 21827000 21827200 0 0 +R100039 chr1 21827200 21827400 0 0 +R100040 chr1 21827400 21827600 0 0 +R100041 chr1 21827600 21827800 0 0 +R100042 chr1 21827800 21828000 0 0 +R100043 chr1 21828000 21828200 0 0 +R100044 chr1 21828200 21828400 0 0 +R100045 chr1 21828400 21828600 0 0 +R100046 chr1 21829000 21829200 0 0 +R100047 chr1 21829400 21829600 0 0 +R100048 chr1 21829600 21829800 0 0 +R100049 chr1 21829800 21830000 0 0 +R100050 chr1 21830000 21830200 0 0