Mercurial > repos > greg > ideas2

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,17 @@
+name: ideas
+owner: greg
+description: |
+  Contains a tool that employs the IDEAS (Integrative and Discriminative Epigenome Annotation System) method for jointly and
+  quantitatively characterizing multivariate epigenetic landscapes in many cell types, tissues or conditions
+homepage_url: http://sites.stat.psu.edu/~yzz2/IDEAS/
+long_description: |
+  Contains a tool that employs the IDEAS (Integrative and Discriminative Epigenome Annotation System) method for jointly and
+  quantitatively characterizing multivariate epigenetic landscapes in many cell types, tissues or conditions. The method
+  accounts for position dependent epigenetic events and detects local cell type relationships, which not only help to improve
+  the accuracy of annotating functional classes of DNA sequences, but also reveal cell type constitutive and specific loci.
+  The method utilizes Bayesian non-parametric techniques to automatically identify the best model size fitting to the data so
+  users do not have to specify the number of states. On the other hand, users can still specify the number of states if desired.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/epigenetics/ideas
+type: unrestricted
+categories:
+- Epigenetics
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/create_heatmap.R	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,129 @@
+#!/usr/bin/env Rscript
+
+build_state_color_codes_vector <- function(data_matrix, histone_mark_color, color_code_type="rgb") {
+    # Return  vector of color code strings for each state
+    # in the received data_matrix.  The values will be either
+    # rgb strings (e.g., 255,255,0) or hex code strings (e.g.,
+    # #FFFFFF) depending on the value of color_code_type,
+    # which can be one of "rgb" or "hex".
+    range_vector = apply(data_matrix, 1, range);
+    mm = NULL;
+    for(i in 1:dim(data_matrix)[1]) {
+        range_val1 = range_vector[1, i] + 1e-10;
+        range_val2 = range_vector[2, i];
+        mm = rbind(mm, (data_matrix[i,] - range_val1) / (range_val2 - range_val1));
+    }
+    mm = mm^5;
+    if(dim(mm)[2] > 1) {
+        mm = mm / (apply(mm, 1, sum) + 1e-10);
+    }
+    state_color = mm%*%histone_mark_color;
+    s = apply(data_matrix, 1, max);
+    s = (s - min(s)) / (max(s) - min(s) + 1e-10);
+    state_color = round(255 - (255 - state_color) * s/0.5);
+    state_color[state_color<0] = 0;
+    if (identical(color_code_type, "rgb")) {
+        # Here rgb_values is something like 255,255,255 217,98,0.
+        state_colors_vector = paste(state_color[,1], state_color[,2], state_color[,3], sep=",");
+    } else {
+        # Here hex_code_strings is something like #FFFFFF #D96200
+        # which is a one-to-one map to the above rgb_values.
+        hex_code_strings = t(apply(state_color, 1, function(x){rgb2hsv(x[1], x[2], x[3])}));
+        state_colors_vector = apply(hex_code_strings, 1, function(x){hsv(x[1], x[2], x[3])});
+    }
+    return(state_colors_vector);
+}
+
+create_heatmap <- function(data_frame, output_file_name, colors=c("white", "dark blue")) {
+    # Plot a heatmap for a .para / .state combination based on the
+    # received data_frame which was created by reading the .para file.
+    num_columns = dim(data_frame)[2];
+    num_rows = dim(data_frame)[1];
+    p = (sqrt(9 + 8 * (num_columns-1)) - 3) / 2;
+    data_matrix = as.matrix(data_frame[,1+1:p] / data_frame[,1]);
+    state_colors_vector = get_state_color_codes_vector(data_frame, colors=colors, color_code_type="hex");
+    # Open the output PDF file.
+    pdf(file=output_file_name);
+    # rownames(data_matrix) are the state indexes,
+    # and will look something like this:
+    # 0 (5.89%) 1 (91.78%) 2 (1.48%) 3 (0.86%)
+    rownames(data_matrix) = paste(1:num_rows-1, " (", round(data_frame[,1]/sum(data_frame[,1])*10000)/100, "%)", sep="");
+    # Set graphical parameters.
+    par(mar=c(6, 1, 1, 6));
+    # Create a vector containing the minimum and maximum values in data_matrix.
+    min_max_vector = range(data_matrix);
+    # Create a color palette.
+    my_palette = colorRampPalette(colors)(n=100);
+    default_palette = palette(my_palette);
+    # Plot the heatmap for the current .para / .state combination.
+    plot(NA, NA, xlim=c(0, p+0.7), ylim=c(0, num_rows), xaxt="n", yaxt="n", xlab=NA, ylab=NA, frame.plot=F);
+    axis(1, at=1:p-0.5, labels=colnames(data_matrix), las=2);
+    axis(4, at=1:num_rows-0.5, labels=rownames(data_matrix), las=2);
+    col = round((t(data_matrix) - min_max_vector[1]) / (min_max_vector[2] - min_max_vector[1]) * 100);
+    rect(rep(1:p-1, num_rows), rep(1:num_rows-1, each=p), rep(1:p, num_rows), rep(1:num_rows, each=p), col=col);
+    rect(rep(p+0.2, num_rows), 1:num_rows-0.8, rep(p+0.8, num_rows), 1:num_rows-0.2, col=state_colors_vector);
+    palette(default_palette);
+    dev.off();
+}
+
+get_state_color_codes_vector <- function(data_frame, colors=c("white", "dark blue"), color_code_type="rgb") {
+    # Return a vector of color strings for each row in data_frame.
+    # These string will either be rgb (e.g., 255,255,0) or hex codes
+    # (e.g., #FFFFFF), depending on the value of color_code_type.
+    num_columns = dim(data_frame)[2];
+    num_rows = dim(data_frame)[1];
+    p = (sqrt(9 + 8 * (num_columns-1)) - 3) / 2;
+    data_matrix = as.matrix(data_frame[,1+1:p] / data_frame[,1]);
+    # colnames(data_matrix) will look something like this:
+    # H3K4me3 H3K4me1 DNase H3K79me2
+    colnames(data_matrix) = colnames(data_frame)[1+1:p];
+    histone_marks = colnames(data_matrix);
+    histone_mark_color = t(col2rgb(terrain.colors(ceiling(p))[1:p]));
+    # Specify colors for common feature names like "h3k4me3".
+    # These are histone marks frequently used to identify
+    # promoter activities in a cell, and are often displayed
+    # in shades of red.
+    for(i in 1:length(histone_marks)) {
+        if(regexpr("h3k4me3", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(255, 0, 0);
+        }
+        if(regexpr("h3k4me2", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(250, 100, 0);
+        }
+        if(regexpr("h3k4me1", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(250, 250, 0);
+        }
+        if(regexpr("h3k36me3", tolower(histone_marks[i]))>0) {
+            histone_mark_color[i,] = c(0, 150, 0);
+        }
+        if(regexpr("h2a", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(0, 150, 150);
+        }
+        if(regexpr("dnase", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(0, 200, 200);
+        }
+        if(regexpr("h3k9ac", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(250, 0, 200);
+        }
+        if(regexpr("h3k9me3", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(100, 100, 100);
+        }
+        if(regexpr("h3k27ac", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(250, 150, 0);
+        }
+        if(regexpr("h3k27me3", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(0, 0, 200);
+        }
+        if(regexpr("h3k79me2", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(200, 0, 200);
+        }
+        if(regexpr("h4k20me1", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(50, 200, 50);
+        }
+        if(regexpr("ctcf", tolower(histone_marks[i])) > 0) {
+            histone_mark_color[i,] = c(200, 0, 250);
+        }
+        state_colors_vector = build_state_color_codes_vector(data_matrix, histone_mark_color, color_code_type=color_code_type);
+    }
+    return(state_colors_vector);
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/create_heatmaps.R	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,35 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+
+option_list <- list(
+    make_option(c("--input_dir"), action="store", dest="input_dir", help="IDEAS para files directory"),
+    make_option(c("--output_dir"), action="store", dest="output_dir", help="PDF output directory"),
+    make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory"),
+    make_option(c("--in_training_mode"), action="store_true", dest="in_training_mode", default=FALSE, help="Flag for training mode")
+)
+
+parser <- OptionParser(usage="%prog [options] file", option_list=option_list);
+args <- parse_args(parser, positional_arguments=TRUE);
+opt <- args$options;
+
+heatmap_path = paste(opt$script_dir, "create_heatmap.R", sep="/");
+source(heatmap_path);
+
+if (opt$in_training_mode) {
+    ext = ".para0";
+    pattern = "\\.para0$";
+} else {
+    ext = ".para";
+    pattern = "\\.para$"
+}
+para_files = list.files(path=opt$input_dir, pattern=pattern, full.names=TRUE);
+for (i in 1:length(para_files)) {
+    para_file = para_files[i];
+    para_file_base_name = strsplit(para_file, split="/")[[1]][2];
+    output_file_base_name = gsub(ext, "", para_file_base_name);
+    output_file_name = paste(output_file_base_name, "state", i, "pdf", sep=".");
+    output_file_path = paste(opt$output_dir, output_file_name, sep="/");
+    data_frame = read.table(para_file, comment="!", header=T);
+    create_heatmap(data_frame, output_file_path);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ideas.R	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,439 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("data.table"))
+suppressPackageStartupMessages(library("optparse"))
+
+option_list <- list(
+    make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"),
+    make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"),
+    make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"),
+    make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"),
+    make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"),
+    make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"),
+    make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"),
+    make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"),
+    make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"),
+    make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"),
+    make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"),
+    make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"),
+    make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"),
+    make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"),
+    make_option(c("--output_dir"), action="store", dest="output_dir", help="Output directory, used only if job ends in error and process log needs saving"),
+    make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"),
+    make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"),
+    make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"),
+    make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"),
+    make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"),
+    make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"),
+    make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"),
+    make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"),
+    make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations")
+)
+
+parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
+args <- parse_args(parser, positional_arguments=TRUE)
+opt <- args$options
+
+add_output_redirect <- function(cmd, output_log) {
+    new_cmd = c(cmd, "&>>", output_log);
+    return(paste(new_cmd, collapse=" "));
+}
+
+combine_state <- function(parafiles, method="ward.D", mycut=0.9, pcut=1.0) {
+    X = NULL;
+    K = NULL;
+    I = NULL;
+    myheader = NULL;
+    p = NULL;
+    for(i in 1:length(parafiles)) {
+        x = fread(parafiles[i]);
+        t = max(which(is.na(x[1,])==F));
+        x = as.matrix(x[,1:t]);
+        if(i==1) {
+            myheader = colnames(x);
+            p = sqrt(9/4-2*(1-length(myheader))) - 3 / 2;
+        }
+        m = match(myheader[1:p+1], colnames(x)[1:p+1]);
+        v = NULL;
+        for(ii in 1:p) {
+            for(jj in 1:ii) {
+                a = max(m[ii],m[jj]);
+                b = min(m[ii],m[jj]);
+                v = c(v, a*(a+1)/2+b-a);
+            }
+        }
+        X = rbind(X, array(as.matrix(x[, c(1, 1+m, 1+p+v)]), dim=c(length(x) / (1+p+length(v)), 1 + p + length(v))));
+        K = c(K, dim(x)[1]);
+        I = c(I, rep(i, dim(x)[1]));
+    }
+    N = length(parafiles);
+    p = (sqrt(1 + dim(X)[2] * 8) - 3) / 2;
+    omycut = mycut;
+    mycut = round(length(parafiles) * mycut);
+    M = array(X[,1:p+1] / X[,1], dim=c(dim(X)[1], p));
+    V = array(0, dim=c(dim(X)[1] * p, p));
+    for(i in 1:dim(X)[1]) {
+        t = (i - 1) * p;
+        l = 1;
+        for(j in 1:p) {
+            for(k in 1:j) {
+                V[t+j, k] = V[t+k, j] = X[i,1+p+l] / X[i,1] - M[i,j] * M[i,k];
+                l = l + 1;
+            }
+        }
+        V[t+1:p,] = t(solve(chol(V[t+1:p,] + diag(1e-1,p))));
+    }
+    D = array(0, dim=rep(dim(X)[1],2));
+    for(i in 2:dim(X)[1]) {
+        for(j in 1:(i-1)) {
+            D[i,j] = D[j,i] = sqrt((sum((V[(i-1)*p+1:p,]%*%(M[i,]-M[j,]))^2) + sum((V[(j-1)*p+1:p,]%*%(M[i,]-M[j,]))^2)));
+        }
+    }
+    MM = NULL;
+    kk = NULL;
+    for(i in 1:N) {
+        t = 1:K[i];
+        if(i > 1) {
+            t = t + sum(K[1:(i-1)]);
+        }
+        t = (1:dim(D)[1])[-t];
+        h = hclust(as.dist(D[t,t]), method=method);
+        k = -1;
+        tM = NULL;
+        for(j in min(K):(min(length(t), max(K)*2))) {
+            m = cutree(h,k=j);
+            tt = NULL;
+            for(l in 1:j) {
+                tt[l] = length(unique(I[t[which(m==l)]]));
+            }
+            tk = length(which(tt>=mycut));
+            if(tk > k) {
+                k = tk;
+                tM = make_parameter(1:j, I[t], m, mycut, X[t,]);
+            } else if(tk < k) {
+                break;
+            }
+        }
+        kk[i] = k;
+        MM = rbind(MM, cbind(i, tM));
+    }
+    mysel = median(kk);
+    h = hclust(as.dist(D), method=method);
+    rt = rep(0, max(K)*2);
+    k = -1;
+    for(i in min(K):min(dim(D)[1], max(K)*2)) {
+        m = cutree(h,k=i);
+        tt = NULL;
+        for(j in 1:i) {
+            tt[j] = length(unique(I[which(m==j)]));
+        }
+        tk = length(which(tt>=mycut));
+        if(tk==mysel | tk<k) {
+            break;
+        }
+        k = tk;
+        rt[i] = length(which(tt>=mycut));
+    }
+    mysel = max(k,tk);
+    m = cutree(h, k=mysel);
+    nn = NULL;
+    for(i in 1:mysel) {
+        t = which(m==i);
+        nn[i] = sum(X[t,1]);
+    }
+    oo = order(nn, decreasing=T);
+    rt = make_parameter(oo, I, m, mycut, X);
+    onstate = max(rt[,1]) + 1;
+    ooo = NULL;
+    for(i in oo) {
+        t = which(m==i);
+        if(length(unique(I[t])) >= mycut) {
+            ooo = c(ooo, i);
+        }
+    }
+    d = NULL;
+    for(i in 1:N) {
+        d = rbind(d, compare_two(rt, MM[MM[,1]==i,-1])[1:onstate]);
+    }
+    dd = array(cutree(hclust(dist(c(d))), k=2), dim=dim(d));
+    kk = table(c(dd));
+    kk = which(as.integer(kk)==max(as.integer(kk)))[1];
+    pp = apply(dd, 2, function(x){length(which(x!=kk))/length(x)});
+    pp0 = apply(d, 2, function(x){length(which(x>0.5))/length(x)});
+    pp[pp0<pp] = pp0[pp0<pp];
+    t = which(pp > pcut);
+    if(length(t) > 0) {
+        j = 0;
+        nrt = NULL;
+        for(i in (1:onstate-1)[-t]) {
+            nrt = rbind(nrt, cbind(j, rt[rt[,1]==i,-1]));
+            j = j + 1;
+        }
+        rt = nrt;
+        ooo = ooo[-t];
+    }
+    nrt = NULL;
+    for(i in 0:max(rt[,1])) {
+        t = which(rt[,1]==i);
+        nrt = rbind(nrt, apply(array(rt[t,], dim=c(length(t), dim(rt)[2])), 2, sum)[-1]);
+    }
+    rt = nrt;
+    colnames(rt) = myheader;
+    O = NULL;
+    Ip = NULL;
+    Xp = NULL;
+    k = 0;
+    for(i in 1:length(parafiles)) {
+        str = gsub(".para", ".profile", parafiles[i]);
+        p = as.matrix(read.table(str));
+        u = array(0, dim=c(dim(p)[1], length(ooo)));
+        for(j in 1:length(ooo)) {
+            t = which(m[k+1:K[i]] == ooo[j]);
+            u[,j] = apply(array(p[,1+t], dim=c(dim(p)[1], length(t))), 1, sum);
+        }
+        k = k + K[i];
+        u = u / (apply(u, 1, sum) + 1e-10);
+        Xp = rbind(Xp, cbind(p[,1], u));
+        Ip = c(Ip, rep(i,dim(u)[1]));
+    }
+    hp = hclust(dist(((Xp[,-1]+min(1e-3, min(Xp[,-1][Xp[,-1]>0]))))), method=method);
+    ocut = min(mycut/2, length(parafiles)/2);
+    t = range(as.integer(table(Ip)));
+    Kp = NULL;
+    for(i in t[1]:(t[2]*2)) {
+        m = cutree(hp, k=i);
+        tt = table(Ip,m);
+        ll = apply(tt, 2, function(x){length(which(x>0))});
+        Kp = c(Kp, length(which(ll>=ocut)));
+    }
+    oN = (t[1]:(t[2]*2))[which(Kp==max(Kp))[1]];
+    m = cutree(hp, k=oN);
+    tt = table(Ip,m);
+    ll = apply(tt, 2, function(x){length(which(x>0))});
+    tt = which(ll>=ocut);
+    for(i in tt) {
+        t = which(m==i);
+        O = rbind(O, c(sum(Xp[t, 1]), apply(array(Xp[t,-1]*Xp[t,1], dim=c(length(t), dim(Xp)[2]-1)), 2, sum)/sum(Xp[t, 1])));
+    }
+    nrt = NULL;
+    nrt$para = rt;
+    nrt$profile = O;
+    return(nrt);
+}
+
+compare_two <- function(n, m) {
+    NN = get_mean(n);
+    MM = get_mean(m);
+    p = (-3 + sqrt(9 + 8 * (dim(n)[2] - 2))) / 2;
+    dd = NULL;
+    for (i in 1:dim(NN)[1]) {
+        dd[i] = min(apply(array(MM[,1:p], dim=c(dim(MM)[1],p)), 1, function(x){sqrt(sum((x-NN[i,1:p])^2))}));
+    }
+    for (i in 1:dim(MM)[1]) {
+        dd[i+dim(NN)[1]] = min(apply(array(NN[,1:p], dim=c(dim(NN)[1],p)), 1, function(x){sqrt(sum((x-MM[i,1:p])^2))}));
+    }
+    return(dd);
+}
+
+get_base_cmd <- function(ideas_input_config, chrom_bed_input, training_iterations, bychr, hp, standardize_datasets, log2,
+        max_states, initial_states, max_position_classes, max_cell_type_clusters, prior_concentration,
+        burnin_num, mcmc_num, minerr, maxerr, rseed, thread) {
+    base_cmd = paste("ideas", ideas_input_config, sep=" ");
+    if (!is.null(chrom_bed_input)) {
+        base_cmd = paste(base_cmd, chrom_bed_input, sep=" ");
+    }
+    if (!is.null(training_iterations)) {
+        base_cmd = paste(base_cmd, "-impute none", sep=" ");
+    }
+    if (bychr) {
+        base_cmd = paste(base_cmd, "-bychr", sep=" ");
+    }
+    if (hp) {
+        base_cmd = paste(base_cmd, "-hp", sep=" ");
+    }
+    if (standardize_datasets) {
+        base_cmd = paste(base_cmd, "-norm", sep=" ");
+    }
+    if (!is.null(log2)) {
+        base_cmd = paste(base_cmd, "-log2", log2, sep=" ");
+    }
+    if (!is.null(max_states)) {
+        base_cmd = paste(base_cmd, "-G", max_states, sep=" ");
+    }
+    if (!is.null(initial_states)) {
+        base_cmd = paste(base_cmd, "-C", initial_states, sep=" ");
+    }
+    if (!is.null(max_position_classes)) {
+        base_cmd = paste(base_cmd, "-P", max_position_classes, sep=" ");
+    }
+    if (!is.null(max_cell_type_clusters)) {
+        base_cmd = paste(base_cmd, "-K", max_cell_type_clusters, sep=" ");
+    }
+    if (!is.null(prior_concentration)) {
+        base_cmd = paste(base_cmd, "-A", prior_concentration, sep=" ");
+    }
+    base_cmd = paste(base_cmd, "-sample", burnin_num, mcmc_num, sep=" ");
+    if (!is.null(minerr)) {
+        base_cmd = paste(base_cmd, "-minerr", minerr, sep=" ");
+    }
+    if (!is.null(maxerr)) {
+        base_cmd = paste(base_cmd, "-maxerr", maxerr, sep=" ");
+    }
+    base_cmd = paste(base_cmd, "-rseed", rseed, sep=" ");
+    base_cmd = paste(base_cmd, "-thread", thread, sep=" ");
+    return(base_cmd);
+}
+
+get_mean <- function(n) {
+    N = NULL;
+    for(i in sort(unique(n[,1]))) {
+        t = which(n[,1]==i);
+        N = rbind(N, apply(array(n[t,], dim=c(length(t), dim(n)[2])), 2, sum)[-1]);
+    }
+    NN = N[,-1] / N[,1];
+    return(array(NN, dim=c(length(NN)/(dim(n)[2]-2), dim(n)[2]-2)));
+}
+
+get_post_training_base_cmd <- function(base_cmd, para) {
+    # Change base_cmd due to training mode.
+    base_cmd_items = as.list(strsplit(base_cmd[1], split=" ", fixed=TRUE))[[1]];
+    if (length(which(base_cmd_items == "-G")) == 0) {
+        base_cmd_items = c(base_cmd_items, "-G", length(para)-1);
+    } else {
+        tt = which(base_cmd_items == "-G");
+        base_cmd_items[tt + 1] = length(para)-1;
+    }
+    tt = which(base_cmd_items == '-C');
+    if(length(tt) > 0) {
+        base_cmd_items = base_cmd_items[-c(tt, tt+1)];
+    }
+    base_cmd = paste(base_cmd_items, collapse=" ");
+    return(base_cmd);
+}
+
+get_windows_by_chrom <- function(chromosome_windows) {
+    fh = file(chromosome_windows, "r");
+    windows_by_chrom = readLines(fh);
+    close(fh);
+    return(windows_by_chrom);
+}
+
+make_parameter <- function(myorder, id, mem, mycut, para) {
+    rt = NULL;
+    j = 0;
+    for(i in myorder) {
+        t = which(mem==i);
+        if (length(unique(id[t])) >= mycut) {
+            rt = rbind(rt, cbind(j, array(para[t,], dim=c(length(t), dim(para)[2]))));
+            j = j + 1;
+        }
+    }
+    return(rt);
+}
+
+remove_files <- function(path, pattern) {
+    files = list.files(path=path, pattern=pattern);
+    for (f in files) {
+        unlink(f);
+    }
+}
+
+run_cmd <- function(cmd, save_ideas_log, output_log, output_dir) {
+    rc = system(cmd);
+    if (rc != 0) {
+        if (is.null(save_ideas_log)) {
+            to_path = paste(output_dir, output_log, sep="/");
+            file.rename(output_log, to_path);
+        }
+        quit(save="no", status=rc);
+    }
+}
+
+# Initialize values.
+if (is.null(opt$save_ideas_log)) {
+    output_log = "ideas_log.txt";
+} else {
+    output_log = opt$output_log;
+}
+if (is.null(opt$chromosome_windows)) {
+    windows_by_chrom = NULL;
+} else {
+    # Read chromosome_windows.txt into memory.
+    windows_by_chrom = get_windows_by_chrom(opt$chromosome_windows);
+}
+base_cmd = get_base_cmd(opt$ideas_input_config, opt$chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp,
+            opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes,
+            opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr,
+            opt$maxerr, opt$rseed, opt$thread);
+output_base_name = opt$project_name;
+# Perform analysis.
+if (is.null(opt$training_iterations)) {
+    # Not performing training.
+    if (is.null(windows_by_chrom)) {
+        # Not performing windows by chromosome.
+        output_name = output_base_name;
+        cmd = paste(base_cmd, "-o", output_name, sep=" ");
+        cmd = add_output_redirect(cmd, output_log);
+        run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
+    } else {
+        # Performing windows by chromosome.
+        for (i in 1:length(windows_by_chrom)) {
+            line = windows_by_chrom[i];
+            items = strsplit(line, " ")[[1]];
+            chrom = items[1];
+            window_start = items[2];
+            window_end = items[3];
+            output_name = paste(output_base_name, chrom, sep=".");
+            cmd = paste(base_cmd, "-inv", window_start, window_end, sep=" ");
+            cmd = paste(cmd, "-o", output_name, sep=" ");
+            cmd = add_output_redirect(cmd, output_log);
+            run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
+        }
+    }
+} else {
+    # Performing training.
+    output_para0 = paste(output_base_name, "para0", sep=".");
+    output_profile0 = paste(output_base_name, "profile0", sep=".");
+    for (i in 1:opt$training_iterations) {
+        cmd = paste(base_cmd, "-o", paste(output_base_name, ".tmp.", i, sep=""), sep=" ");
+        cmd = add_output_redirect(cmd, output_log);
+        run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
+    }
+    tpara = combine_state(paste(output_base_name, "tmp", (1:opt$training_iterations), "para", sep="."), mycut=0.5);
+    write.table(tpara$profile, output_profile0, quote=F, row.names=F, col.names=F);
+    para = tpara$para;
+    para = apply(para, 1, function(x){paste(x, collapse=" ")});
+    para = c(readLines(paste(output_base_name, "tmp", "1", "para", sep="."), n=1), para);
+    writeLines(para, output_para0);
+    # Now run IDEAS based on the files produced during training.
+    base_cmd = get_post_training_base_cmd(base_cmd, para);
+    base_cmd = paste(base_cmd, "-otherpara", output_para0[[1]], output_profile0[[1]], sep=" ");
+    if (is.null(windows_by_chrom)) {
+        cmd = c(base_cmd, "-o", output_base_name);
+        cmd = add_output_redirect(cmd, output_log);
+        run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
+    } else {
+        # Performing windows by chromosome.
+        if (length(windows_by_chrom) == 1) {
+            output_name = paste(output_base_name, i, sep=".");
+            cmd = c(base_cmd, "-o", output_name);
+            cmd = add_output_redirect(cmd, output_log);
+            run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
+        } else {
+            for (i in 1:length(windows_by_chrom)) {
+                line = windows_by_chrom[i];
+                items = strsplit(line, " ")[[1]];
+                chrom = items[[1]];
+                window_start = items[[2]];
+                window_end = items[[3]];
+                cmd = paste(base_cmd, "-inv", window_start, window_end, sep=" ");
+                output_name = paste(output_base_name, chrom, sep=".");
+                cmd = paste(cmd, "-o", output_name, sep=" ");
+                cmd = add_output_redirect(cmd, output_log);
+                run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
+            }
+        }
+    }
+    # Remove temporary outputs.
+    remove_files(path=".", pattern="tmp");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ideas.xml	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,271 @@
+<tool id="ideas" name="IDEAS" version="1.2.0">
+    <description>accounts for position dependent epigenetic events and detects local cell type relationships</description>
+    <requirements>
+        <requirement type="package" version="2.26.0">bedtools</requirement>
+        <requirement type="package" version="332">ucsc-bedgraphtobigwig</requirement>
+        <requirement type="package" version="332">ucsc-bedsort</requirement>
+        <requirement type="package" version="332">ucsc-bigwigaverageoverbed</requirement>
+        <requirement type="package" version="1.20">ideas</requirement>
+        <requirement type="package" version="1.10.4">r-data.table</requirement>
+        <requirement type="package" version="1.4.4">r-optparse</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+#import os
+#set perform_training = $perform_training_cond.perform_training
+
+## Extract the input's compressed tmp directory archive.
+tar -xzf $input.metadata.tmp_archive &&
+
+## Define and create output directories.
+#set output_pdf_dir = "output_pdf_dir"
+#set output_txt_dir = "output_txt_dir"
+#set output_training_dir = "output_training_dir"
+#if str($output_heatmaps) == "yes":
+    mkdir '$output_pdf_dir' &&
+#end if
+#if str($perform_training) == "yes":
+    #set output_dir = $output_training_dir
+    mkdir '$output_training_dir' &&
+#else:
+    #set output_dir = $output_txt_dir
+    mkdir '$output_txt_dir' &&
+#end if
+
+Rscript '$__tool_directory__/ideas.R'
+--burnin_num $burnin_num
+#if str($bychr) == "true":
+    --bychr true
+#end if
+#if str($input.metadata.chrom_bed) not in ['', 'None']:
+    --chrom_bed_input $input.metadata.chrom_bed
+#end if
+#if str($input.metadata.chrom_windows) not in ['' 'None']:
+    --chromosome_windows $input.metadata.chrom_windows
+#end if
+#if str($hp) == "true":
+    --hp true
+#end if
+#if str($initial_states) != "0":
+    --initial_states $initial_states
+#end if
+--ideas_input_config $input.metadata.input_config
+#if str($log2) != "0.0":
+    --log2 $log2
+#end if
+#if str($maxerr) != "0.0":
+    --maxerr $maxerr
+#end if
+#if str($max_cell_type_clusters) != "0":
+    --max_cell_type_clusters $max_cell_type_clusters
+#end if
+#if str($max_position_classes) != "0":
+    --max_position_classes $max_position_classes
+#end if
+#if str($max_states) != "0.0":
+    --max_states $max_states
+#end if
+--mcmc_num $mcmc_num
+#if str($minerr) != "0.0":
+    --minerr $minerr
+#end if
+--output_dir $output_dir
+#if str($prior_concentration) != "0.0":
+    --prior_concentration $prior_concentration
+#end if
+--project_name '$project_name'
+#if str($save_ideas_log) == "yes":
+    --save_ideas_log $save_ideas_log
+    --output_log '$output_log'
+#end if
+#if str($standardize_datasets) == "true":
+    --standardize_datasets true
+#end if
+--rseed $rseed
+--thread \${GALAXY_SLOTS:-4}
+#if str($perform_training) == "yes":
+    --training_iterations $perform_training_cond.training_iterations
+    --training_windows $perform_training_cond.training_windows
+#end if
+#if str($perform_training) == "yes":
+    && mv ./*.para0 '$output_dir'
+    && mv ./*.profile0 '$output_dir'
+#else:
+    && mv ./*.para '$output_dir'
+    && mv ./*.profile '$output_dir'
+#end if
+&& mv ./*.cluster '$output_dir'
+&& mv ./*.state '$output_dir'
+#if str($output_heatmaps) == "yes":
+    && Rscript '$__tool_directory__/create_heatmaps.R'
+    --input_dir '$output_dir'
+    --output_dir '$output_pdf_dir'
+    --script_dir '$__tool_directory__'
+    #if str($perform_training) == "yes":
+        --in_training_mode true
+    #end if
+#end if
+    ]]></command>
+    <inputs>
+        <conditional name="perform_training_cond">
+            <param name="perform_training" type="select" label="Perform training?">
+                <option value="yes" selected="true">Yes</option>
+                <option value="no">No</option>
+            </param>
+            <when value="yes">
+                <param name="training_iterations" type="integer" value="20" min="3" label="Number of training iterations"/>
+                <param name="training_windows" type="integer" value="10000" min="2" label="Number of randomly selected windows for training"/>
+            </when>
+            <when value="no"/>
+        </conditional>
+        <param name="input" type="data" format="ideaspre" label="Select IDEAS input"/>
+        <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name">
+            <validator type="empty_field"/>
+        </param>
+        <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/>
+        <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
+        <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each window using">
+            <option value="6" selected="true">mean</option>
+            <option value="8">max</option>
+        </param>
+        <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/>
+        <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/>
+        <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/>
+        <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Positive integer"/>
+        <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/>
+        <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/>
+        <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/>
+        <param name="standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/>
+        <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/>
+        <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/>
+        <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/>
+        <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero sets the maximum to a large number"/>
+        <param name="output_heatmaps" type="select" display="radio" label="Output heatmaps?">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+        <param name="save_ideas_log" type="select" display="radio" label="Save IDEAS log in an additional history item">
+            <option value="no" selected="true">No</option>
+            <option value="yes">Yes</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_log" format="txt" label="${tool.name} (output log) on ${on_string}">
+            <filter>save_ideas_log == 'yes'</filter>
+        </data>
+        <collection name="output_pdf_collection" type="list" label="${tool.name} (heatmaps) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="output_pdf_dir" format="pdf"/>
+            <filter>output_heatmaps == 'yes'</filter>
+        </collection>
+        <collection name="output_txt_collection" type="list">
+            <discover_datasets pattern="__name__" directory="output_txt_dir" format="txt"/>
+            <filter>perform_training_cond['perform_training'] == 'no'</filter>
+        </collection>
+        <collection name="output_training_collection" type="list">
+            <discover_datasets pattern="__name__" directory="output_training_dir" format="txt"/>
+            <filter>perform_training_cond['perform_training'] == 'yes'</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="perform_training" value="yes"/>
+            <param name="training_iterations" value="3"/>
+            <param name="input" value="ideas_test1/input.html" dbkey="hg19" ftype="ideaspre">
+                <!--
+                    The order is critical here - it must be the same as is displayed on the upload form!
+                    Also, there seems to be a bug with the composite upload form tab.  All datasets must
+                    be selected whether they are optional or not.  Here the chromosome_windows.txt file
+                    was generated during a manual execution of ideas_preprocessor tool, specifying chrom
+                    windows.
+                -->
+                <composite_data value='ideas_test1/chromosome_windows.txt'/>
+                <composite_data value='ideas_test1/chromosomes.bed'/>
+                <composite_data value='ideas_test1/IDEAS_input_config.txt'/>
+                <composite_data value='ideas_test1/tmp.tar.gz'/>
+            </param>
+            <param name="output_heatmaps" value="yes"/>
+            <param name="project_name" value="IDEAS_out"/>
+            <output_collection name="output_training_collection" type="list">
+                <element name="IDEAS_out.chr1.cluster" file="IDEAS_out.chr1.cluster" ftype="txt"/>
+                <element name="IDEAS_out.chr2.cluster" file="IDEAS_out.chr2.cluster" ftype="txt"/>
+                <element name="IDEAS_out.chr1.state" file="IDEAS_out.chr1.state" ftype="txt"/>
+                <element name="IDEAS_out.chr2.state" file="IDEAS_out.chr2.state" ftype="txt"/>
+                <element name="IDEAS_out.para0" file="IDEAS_out.para0" ftype="txt"/>
+                <element name="IDEAS_out.profile0" file="IDEAS_out.profile0" ftype="txt" compare="contains"/>
+            </output_collection>
+            <output_collection name="output_pdf_collection" type="list">
+                <element name="IDEAS_out.state.1.pdf" file="IDEAS_out.state.1.pdf" ftype="pdf" compare="contains"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+IDEAS (an **I**\ ntegrative and **D**\ iscriminative **E**\ pigenome **A**\ nnotation **S**\ ystem) identifies
+de novo regulatory functions from epigenetic data in multiple cell types jointly. It is a full probabilistic
+model defined on all data, and it combines signals across both the genome and cell types to boost power. The
+underlying assumption of IDEAS is that, because all cell types share the same underlying DNA sequences,
+**functions of each DNA segment should be correlated**. Also, cell type specific regulation is locus-dependent,
+and so IDEAS uses local epigenetic landscape to **identify de novo and local cell type clusters** without
+assuming or requiring a known global cell type relationship.
+
+The input is a single dataset with the **IdeasPre** datatype, which is produced by the IDEAS Preprocessor tool.
+
+.. image:: $PATH_TO_IMAGES/ideas.png
+
+IDEAS predicts regulatory functions, denoted by epigenetic states, at each position in each cell type by
+**combining information simultaneously learned from other cell types** at the same positions in cell types with
+similar local epigenetic landscapes. Size of genomic intervals for determining the similarity are also learned.
+All of the inferences are done through parallel infinite-state hidden Markov models (iHMM), which is a Bayesian
+non-parametric technique to automatically determine the number of local cell type clusters and the number of
+epigenetic states.
+
+In addition to its improved power, IDEAS has two unique advantages:
+
+ 1) applies **linear time inference** with respect to the number of cell types, which allows it to study hundreds or more cell types jointly
+ 2) uses mini-batch training to **improve reproducibility** of the predicted epigenetic states, which is important because genome segmentation is not convex and hence cannot guarantee a global optimal solution.
+
+-----
+
+**Options**
+
+* **Perform training** - select "Yes" to run the specified number of training iterations, running IDEAS with the parameter values and producing outputs.  After training, these outputs are combined into a single dataset which is then used in conjunction with the inputs for the actual analysis.  This process improves the accuracy of the final results.
+
+ * **Number of training iterations** - the number of times to execute IDEAS with the specified parameter values on the selected inputs to produce the training results.  The minimum number of iterations is 3.
+ * **Number of randomly selected windows for training** - the number of chromosome windows within the input datasets from which to randomly select data for training.
+
+* **Set cell type and epigenetic factor names by** - cell type and epigenetic factor names can be set manually or by extracting them from the names of the selected input datasets.  The latter case requires all selected datasets to have names that contain a "-" character.
+
+ * **BAM or BigWig files** - select one or more Bam or Bigwig files from your history, making sure that the name of every selected input include a "-" character (e.g., e001-h3k4me3.bigwig).
+ * **Cell type, Epigenetic factor and Input** - manually select any number of inputs, setting the cell type and epigenetic factor name for each.  The combination of "cell type name" and "epigenetic factor name" must be unique for each input.  For example, if you have replicate data you may want to specify the cell name as "rep1", "rep2", etc and the factor name as "rep1", "rep2", etc.
+
+  * **Cell type name** - cell type name
+  * **Epigenetic factor name** - epigenetic factor name
+  * **BAM or BigWig file** - BAM or BigWig file
+
+* **Project name** - datasets produced by IDEAS will have this base name.
+* **Seed for IDEAS model initialization** - enter an integer to be used as the seed for the IDEAS model initialization.  A zero value causes IDEAS to automatically generate a random seed, and this seed will be different for each job run.
+* **Output chromosomes in separate files** - select "Yes" to produce separate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
+* **Calculate the signal in each window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate the signal (i.e., the number of reads per bp) in each window.
+* **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
+* **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision.
+* **Use log2(x+number) transformation** - perform Log2-transformation of the input data by log2(x+number) (recommended for read count data to reduce skewness). You can enter a number that is representative of the noise level in your data (e.g., a number less than 1). If this number is at a similar scale or larger than the signal in your data, it will lose power.  For example, if your input data is mean read count per window, using 0.1 may produce better results.
+* **Maximum number of states to be inferred** - restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified
+* **Initial number of states** - while IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.
+* **Maximum number of position classes to be inferred** - Set this value only if:
+
+ * you do not want position classes (e.g., for testing purposes), in this case set the value to 1
+ * IDEAS runs slow because there are too many position classes, generally less than 100 position classes will run fine
+
+* **Maximum number of cell type clusters allowed** - If you set the value to 1, then all cell types will be clustered in one group, which may be desirable if all cell types are homogeneous and you want IDEAS to use information in all cell types equally.
+* **Prior concentration** - specify the prior concentration parameter; default is A=sqrt(number of cell types).  A smaller concentration parameter (e.g., 1 or less) will emphasize more on position specificity and a larger concentration parameter (e.g., 10 * number of cell types) will emphasize more on global homogeneity.
+* **Number of burnin steps** - specify the number of burnin steps; default is 20.  Increasing the burnin and maximization steps will increase computing and only slightly increase accuracy, while decreasing them will reduce computing resources but may also reduce accuracy.  We recommend to run IDEAS with at least 20 burnins and 20 maximizations.  IDEAS will not stop even if it reaches a maximum mode.
+* **Number of maximization steps** - specify the number of maximization steps; default is 20.
+* **Minimum standard deviation for the emission Gaussian distribution** - This number multiplied by the overall standard deviation of your data will be used as a lower bound for the standard deviation for each factor in each epigenetic state (the default is 0.5). This number is useful for removing very subtle clusters in the data.  Setting this value near 0 will allow IDEAS to discover many subtle states, while setting it greater than 1 will result in IDEAS losing the ability to detect meaningful states.
+* **Maximim standard deviation for the emission Gaussian distribution** - if you want to find fine-grained states you may use this option (if not used, IDEAS uses infinity), but it is rearely used unless you need more states to be inferred.
+* **Output heatmaps** - select "Yes" to produce an additional dataset collection consisting of PDF datasets, one for each dataset with a .para extension in the primary IDEAS output dataset collection.
+* **Save IDEAS log in an additional history item** - select "Yes" to produce an additional history item that contains the entire IDEAS processing log.
+    </help>
+    <citations>
+        <citation type="doi">10.1093/nar/gkw278</citation>
+    </citations>
+</tool>
Binary file static/images/ideas.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IDEAS_out.chr1.cluster	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,1 @@
+id0 0:0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IDEAS_out.chr1.state	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,6 @@
+#ID CHR POSst POSed e001 PosClass
+0 chr1 0 50000000 0 0
+1 chr1 50000000 100000000 0 1
+2 chr1 100000000 150000000 0 1
+3 chr1 150000000 200000000 0 1
+4 chr1 200000000 249250621 0 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IDEAS_out.chr2.cluster	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,1 @@
+id0 0:0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IDEAS_out.chr2.state	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,6 @@
+#ID CHR POSst POSed e001 PosClass
+0 chr2 0 50000000 0 0
+1 chr2 50000000 100000000 0 1
+2 chr2 100000000 150000000 0 1
+3 chr2 150000000 200000000 0 1
+4 chr2 200000000 243199373 0 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IDEAS_out.para0	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,2 @@
+#count	h3k4me3	h3k4me3*h3k4me3
+60 0 30
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IDEAS_out.profile0	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,1 @@
+20 0.999999999876308
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IDEAS_out.state.1.pdf	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,40 @@
+1 0 obj
+<<
+/Title (R Graphics Output)
+/Creator (R)
+>>
+endobj
+2 0 obj
+endobj
+7 0 obj
+endobj
+8 0 obj
+<<
+>>
+endobj
+3 0 obj
+endobj
+4 0 obj
+/ProcSet [/PDF /Text]
+/Font <</F2 10 0 R >>
+/ExtGState << >>
+/ColorSpace << /sRGB 5 0 R >>
+>>
+endobj
+5 0 obj
+[/ICCBased 6 0 R]
+endobj
+6 0 obj
+endobj
+9 0 obj
+<<
+/Type /Encoding /BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+10 0 obj
+endobj
+xref
+trailer
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ideas_test1/IDEAS_input_config.txt	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,1 @@
+e001 h3k4me3 tmp/e001-h3k4me3.bed.gz
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ideas_test1/IDEAS_out.state.1.pdf	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,14 @@
+1 0 obj
+endobj
+2 0 obj
+7 0 obj
+8 0 obj
+3 0 obj
+4 0 obj
+5 0 obj
+6 0 obj
+9 0 obj
+10 0 obj
+startxref
+3902
+%%EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ideas_test1/chromosome_windows.txt	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,2 @@
+chr1 0 5
+chr2 5 10
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ideas_test1/chromosomes.bed	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,10 @@
+chr1	0	50000000
+chr1	50000000	100000000
+chr1	100000000	150000000
+chr1	150000000	200000000
+chr1	200000000	249250621
+chr2	0	50000000
+chr2	50000000	100000000
+chr2	100000000	150000000
+chr2	150000000	200000000
+chr2	200000000	243199373
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ideas_test1/input.html	Mon Feb 12 09:52:26 2018 -0500
@@ -0,0 +1,8 @@
+<html><head></head><body>
+<h3>Files prepared for IDEAS</h3>
+<ul>
+<li><a href="chromosomes.bed">chromosomes.bed</a></li>
+<li><a href="chromosome_windows.txt">chromosome_windows.txt</a></li>
+<li><a href="IDEAS_input_config.txt">IDEAS_input_config.txt</a></li>
+<li><a href="tmp.tar.gz">tmp.tar.gz</a></li>
+</ul></body></html>
Binary file test-data/ideas_test1/tmp.tar.gz has changed