annotate ideas.R @ 156:7efdaa2e0bf1 draft

Uploaded
author greg
date Fri, 12 Jan 2018 13:59:12 -0500
parents b140097a984e
children 7d33a8157c6b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
3762c27d820a Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env Rscript
3762c27d820a Uploaded
greg
parents:
diff changeset
2
3762c27d820a Uploaded
greg
parents:
diff changeset
3 suppressPackageStartupMessages(library("data.table"))
3762c27d820a Uploaded
greg
parents:
diff changeset
4 suppressPackageStartupMessages(library("optparse"))
3762c27d820a Uploaded
greg
parents:
diff changeset
5
3762c27d820a Uploaded
greg
parents:
diff changeset
6 option_list <- list(
3762c27d820a Uploaded
greg
parents:
diff changeset
7 make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"),
3762c27d820a Uploaded
greg
parents:
diff changeset
8 make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"),
3762c27d820a Uploaded
greg
parents:
diff changeset
9 make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"),
3762c27d820a Uploaded
greg
parents:
diff changeset
10 make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"),
3762c27d820a Uploaded
greg
parents:
diff changeset
11 make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"),
3762c27d820a Uploaded
greg
parents:
diff changeset
12 make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"),
3762c27d820a Uploaded
greg
parents:
diff changeset
13 make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"),
3762c27d820a Uploaded
greg
parents:
diff changeset
14 make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"),
3762c27d820a Uploaded
greg
parents:
diff changeset
15 make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"),
3762c27d820a Uploaded
greg
parents:
diff changeset
16 make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"),
3762c27d820a Uploaded
greg
parents:
diff changeset
17 make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"),
3762c27d820a Uploaded
greg
parents:
diff changeset
18 make_option(c("--norm"), action="store_true", dest="norm", default=FALSE, help="Standardize all datasets"),
3762c27d820a Uploaded
greg
parents:
diff changeset
19 make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"),
3762c27d820a Uploaded
greg
parents:
diff changeset
20 make_option(c("--prep_output_config"), action="store", dest="prep_output_config", help="prepMat output config file"),
3762c27d820a Uploaded
greg
parents:
diff changeset
21 make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"),
3762c27d820a Uploaded
greg
parents:
diff changeset
22 make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"),
3762c27d820a Uploaded
greg
parents:
diff changeset
23 make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"),
3762c27d820a Uploaded
greg
parents:
diff changeset
24 make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"),
3762c27d820a Uploaded
greg
parents:
diff changeset
25 make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory"),
3762c27d820a Uploaded
greg
parents:
diff changeset
26 make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"),
3762c27d820a Uploaded
greg
parents:
diff changeset
27 make_option(c("--tmp_dir"), action="store", dest="tmp_dir", help="Directory of bed files"),
3762c27d820a Uploaded
greg
parents:
diff changeset
28 make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"),
3762c27d820a Uploaded
greg
parents:
diff changeset
29 make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations"),
3762c27d820a Uploaded
greg
parents:
diff changeset
30 make_option(c("--windows_bed"), action="store", dest="windows_bed", default=NULL, help="Bed file containing bed windows"),
3762c27d820a Uploaded
greg
parents:
diff changeset
31 make_option(c("--window_end"), action="store", dest="window_end", type="integer", default=NULL, help="Windows positions by chromosome end value"),
3762c27d820a Uploaded
greg
parents:
diff changeset
32 make_option(c("--window_start"), action="store", dest="window_start", type="integer", default=NULL, help="Windows positions by chromosome start value")
3762c27d820a Uploaded
greg
parents:
diff changeset
33 )
3762c27d820a Uploaded
greg
parents:
diff changeset
34
3762c27d820a Uploaded
greg
parents:
diff changeset
35 parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
3762c27d820a Uploaded
greg
parents:
diff changeset
36 args <- parse_args(parser, positional_arguments=TRUE)
3762c27d820a Uploaded
greg
parents:
diff changeset
37 opt <- args$options
3762c27d820a Uploaded
greg
parents:
diff changeset
38
3762c27d820a Uploaded
greg
parents:
diff changeset
39 add_output_redirect <- function(cmd, save_ideas_log, output_log, default_log_name) {
3762c27d820a Uploaded
greg
parents:
diff changeset
40 if (is.null(save_ideas_log)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
41 cmd = paste(cmd, "&>>", default_log_name, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
42 }else {
3762c27d820a Uploaded
greg
parents:
diff changeset
43 cmd = paste(cmd, "&>>", output_log, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
44 }
3762c27d820a Uploaded
greg
parents:
diff changeset
45 return(cmd);
3762c27d820a Uploaded
greg
parents:
diff changeset
46 }
3762c27d820a Uploaded
greg
parents:
diff changeset
47
3762c27d820a Uploaded
greg
parents:
diff changeset
48 combine_state <- function(parafiles, method="ward.D", mycut=0.9, pcut=1.0) {
3762c27d820a Uploaded
greg
parents:
diff changeset
49 X = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
50 K = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
51 I = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
52 myheader = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
53 p = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
54 for(i in 1:length(parafiles)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
55 x = fread(parafiles[i]);
3762c27d820a Uploaded
greg
parents:
diff changeset
56 t = max(which(is.na(x[1,])==F));
3762c27d820a Uploaded
greg
parents:
diff changeset
57 x = as.matrix(x[,1:t]);
3762c27d820a Uploaded
greg
parents:
diff changeset
58 if(i==1) {
3762c27d820a Uploaded
greg
parents:
diff changeset
59 myheader = colnames(x);
3762c27d820a Uploaded
greg
parents:
diff changeset
60 p = sqrt(9/4-2*(1-length(myheader))) - 3 / 2;
3762c27d820a Uploaded
greg
parents:
diff changeset
61 }
3762c27d820a Uploaded
greg
parents:
diff changeset
62 m = match(myheader[1:p+1], colnames(x)[1:p+1]);
3762c27d820a Uploaded
greg
parents:
diff changeset
63 v = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
64 for(ii in 1:p) {
3762c27d820a Uploaded
greg
parents:
diff changeset
65 for(jj in 1:ii) {
3762c27d820a Uploaded
greg
parents:
diff changeset
66 a = max(m[ii],m[jj]);
3762c27d820a Uploaded
greg
parents:
diff changeset
67 b = min(m[ii],m[jj]);
3762c27d820a Uploaded
greg
parents:
diff changeset
68 v = c(v, a*(a+1)/2+b-a);
3762c27d820a Uploaded
greg
parents:
diff changeset
69 }
3762c27d820a Uploaded
greg
parents:
diff changeset
70 }
3762c27d820a Uploaded
greg
parents:
diff changeset
71 X = rbind(X, array(as.matrix(x[, c(1, 1+m, 1+p+v)]), dim=c(length(x) / (1+p+length(v)), 1 + p + length(v))));
3762c27d820a Uploaded
greg
parents:
diff changeset
72 K = c(K, dim(x)[1]);
3762c27d820a Uploaded
greg
parents:
diff changeset
73 I = c(I, rep(i, dim(x)[1]));
3762c27d820a Uploaded
greg
parents:
diff changeset
74 }
3762c27d820a Uploaded
greg
parents:
diff changeset
75 N = length(parafiles);
3762c27d820a Uploaded
greg
parents:
diff changeset
76 p = (sqrt(1 + dim(X)[2] * 8) - 3) / 2;
3762c27d820a Uploaded
greg
parents:
diff changeset
77 omycut = mycut;
3762c27d820a Uploaded
greg
parents:
diff changeset
78 mycut = round(length(parafiles) * mycut);
3762c27d820a Uploaded
greg
parents:
diff changeset
79 M = array(X[,1:p+1] / X[,1], dim=c(dim(X)[1], p));
3762c27d820a Uploaded
greg
parents:
diff changeset
80 V = array(0, dim=c(dim(X)[1] * p, p));
3762c27d820a Uploaded
greg
parents:
diff changeset
81 for(i in 1:dim(X)[1]) {
3762c27d820a Uploaded
greg
parents:
diff changeset
82 t = (i - 1) * p;
3762c27d820a Uploaded
greg
parents:
diff changeset
83 l = 1;
3762c27d820a Uploaded
greg
parents:
diff changeset
84 for(j in 1:p) {
3762c27d820a Uploaded
greg
parents:
diff changeset
85 for(k in 1:j) {
3762c27d820a Uploaded
greg
parents:
diff changeset
86 V[t+j, k] = V[t+k, j] = X[i,1+p+l] / X[i,1] - M[i,j] * M[i,k];
3762c27d820a Uploaded
greg
parents:
diff changeset
87 l = l + 1;
3762c27d820a Uploaded
greg
parents:
diff changeset
88 }
3762c27d820a Uploaded
greg
parents:
diff changeset
89 }
3762c27d820a Uploaded
greg
parents:
diff changeset
90 V[t+1:p,] = t(solve(chol(V[t+1:p,] + diag(1e-1,p))));
3762c27d820a Uploaded
greg
parents:
diff changeset
91 }
3762c27d820a Uploaded
greg
parents:
diff changeset
92 D = array(0, dim=rep(dim(X)[1],2));
3762c27d820a Uploaded
greg
parents:
diff changeset
93 for(i in 2:dim(X)[1]) {
3762c27d820a Uploaded
greg
parents:
diff changeset
94 for(j in 1:(i-1)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
95 D[i,j] = D[j,i] = sqrt((sum((V[(i-1)*p+1:p,]%*%(M[i,]-M[j,]))^2) + sum((V[(j-1)*p+1:p,]%*%(M[i,]-M[j,]))^2)));
3762c27d820a Uploaded
greg
parents:
diff changeset
96 }
3762c27d820a Uploaded
greg
parents:
diff changeset
97 }
3762c27d820a Uploaded
greg
parents:
diff changeset
98 MM = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
99 kk = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
100 for(i in 1:N) {
3762c27d820a Uploaded
greg
parents:
diff changeset
101 t = 1:K[i];
3762c27d820a Uploaded
greg
parents:
diff changeset
102 if(i > 1) {
3762c27d820a Uploaded
greg
parents:
diff changeset
103 t = t + sum(K[1:(i-1)]);
3762c27d820a Uploaded
greg
parents:
diff changeset
104 }
3762c27d820a Uploaded
greg
parents:
diff changeset
105 t = (1:dim(D)[1])[-t];
3762c27d820a Uploaded
greg
parents:
diff changeset
106 h = hclust(as.dist(D[t,t]), method=method);
3762c27d820a Uploaded
greg
parents:
diff changeset
107 k = -1;
3762c27d820a Uploaded
greg
parents:
diff changeset
108 tM = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
109 for(j in min(K):(min(length(t), max(K)*2))) {
3762c27d820a Uploaded
greg
parents:
diff changeset
110 m = cutree(h,k=j);
3762c27d820a Uploaded
greg
parents:
diff changeset
111 tt = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
112 for(l in 1:j) {
3762c27d820a Uploaded
greg
parents:
diff changeset
113 tt[l] = length(unique(I[t[which(m==l)]]));
3762c27d820a Uploaded
greg
parents:
diff changeset
114 }
3762c27d820a Uploaded
greg
parents:
diff changeset
115 tk = length(which(tt>=mycut));
3762c27d820a Uploaded
greg
parents:
diff changeset
116 if(tk > k) {
3762c27d820a Uploaded
greg
parents:
diff changeset
117 k = tk;
3762c27d820a Uploaded
greg
parents:
diff changeset
118 tM = make_parameter(1:j, I[t], m, mycut, X[t,]);
3762c27d820a Uploaded
greg
parents:
diff changeset
119 } else if(tk < k) {
3762c27d820a Uploaded
greg
parents:
diff changeset
120 break;
3762c27d820a Uploaded
greg
parents:
diff changeset
121 }
3762c27d820a Uploaded
greg
parents:
diff changeset
122 }
3762c27d820a Uploaded
greg
parents:
diff changeset
123 kk[i] = k;
3762c27d820a Uploaded
greg
parents:
diff changeset
124 MM = rbind(MM, cbind(i, tM));
3762c27d820a Uploaded
greg
parents:
diff changeset
125 }
3762c27d820a Uploaded
greg
parents:
diff changeset
126 mysel = median(kk);
3762c27d820a Uploaded
greg
parents:
diff changeset
127 h = hclust(as.dist(D), method=method);
3762c27d820a Uploaded
greg
parents:
diff changeset
128 rt = rep(0, max(K)*2);
3762c27d820a Uploaded
greg
parents:
diff changeset
129 k = -1;
3762c27d820a Uploaded
greg
parents:
diff changeset
130 for(i in min(K):min(dim(D)[1], max(K)*2)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
131 m = cutree(h,k=i);
3762c27d820a Uploaded
greg
parents:
diff changeset
132 tt = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
133 for(j in 1:i) {
3762c27d820a Uploaded
greg
parents:
diff changeset
134 tt[j] = length(unique(I[which(m==j)]));
3762c27d820a Uploaded
greg
parents:
diff changeset
135 }
3762c27d820a Uploaded
greg
parents:
diff changeset
136 tk = length(which(tt>=mycut));
3762c27d820a Uploaded
greg
parents:
diff changeset
137 if(tk==mysel | tk<k) {
3762c27d820a Uploaded
greg
parents:
diff changeset
138 break;
3762c27d820a Uploaded
greg
parents:
diff changeset
139 }
3762c27d820a Uploaded
greg
parents:
diff changeset
140 k = tk;
3762c27d820a Uploaded
greg
parents:
diff changeset
141 rt[i] = length(which(tt>=mycut));
3762c27d820a Uploaded
greg
parents:
diff changeset
142 }
3762c27d820a Uploaded
greg
parents:
diff changeset
143 mysel = max(k,tk);
3762c27d820a Uploaded
greg
parents:
diff changeset
144 m = cutree(h, k=mysel);
3762c27d820a Uploaded
greg
parents:
diff changeset
145 nn = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
146 for(i in 1:mysel) {
3762c27d820a Uploaded
greg
parents:
diff changeset
147 t = which(m==i);
3762c27d820a Uploaded
greg
parents:
diff changeset
148 nn[i] = sum(X[t,1]);
3762c27d820a Uploaded
greg
parents:
diff changeset
149 }
3762c27d820a Uploaded
greg
parents:
diff changeset
150 oo = order(nn, decreasing=T);
3762c27d820a Uploaded
greg
parents:
diff changeset
151 rt = make_parameter(oo, I, m, mycut, X);
3762c27d820a Uploaded
greg
parents:
diff changeset
152 onstate = max(rt[,1]) + 1;
3762c27d820a Uploaded
greg
parents:
diff changeset
153 ooo = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
154 for(i in oo) {
3762c27d820a Uploaded
greg
parents:
diff changeset
155 t = which(m==i);
3762c27d820a Uploaded
greg
parents:
diff changeset
156 if(length(unique(I[t])) >= mycut) {
3762c27d820a Uploaded
greg
parents:
diff changeset
157 ooo = c(ooo, i);
3762c27d820a Uploaded
greg
parents:
diff changeset
158 }
3762c27d820a Uploaded
greg
parents:
diff changeset
159 }
3762c27d820a Uploaded
greg
parents:
diff changeset
160 d = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
161 for(i in 1:N) {
3762c27d820a Uploaded
greg
parents:
diff changeset
162 d = rbind(d, compare_two(rt, MM[MM[,1]==i,-1])[1:onstate]);
3762c27d820a Uploaded
greg
parents:
diff changeset
163 }
3762c27d820a Uploaded
greg
parents:
diff changeset
164 dd = array(cutree(hclust(dist(c(d))), k=2), dim=dim(d));
3762c27d820a Uploaded
greg
parents:
diff changeset
165 kk = table(c(dd));
3762c27d820a Uploaded
greg
parents:
diff changeset
166 kk = which(as.integer(kk)==max(as.integer(kk)))[1];
3762c27d820a Uploaded
greg
parents:
diff changeset
167 pp = apply(dd, 2, function(x){length(which(x!=kk))/length(x)});
3762c27d820a Uploaded
greg
parents:
diff changeset
168 pp0 = apply(d, 2, function(x){length(which(x>0.5))/length(x)});
3762c27d820a Uploaded
greg
parents:
diff changeset
169 pp[pp0<pp] = pp0[pp0<pp];
3762c27d820a Uploaded
greg
parents:
diff changeset
170 t = which(pp > pcut);
3762c27d820a Uploaded
greg
parents:
diff changeset
171 if(length(t) > 0) {
3762c27d820a Uploaded
greg
parents:
diff changeset
172 j = 0;
3762c27d820a Uploaded
greg
parents:
diff changeset
173 nrt = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
174 for(i in (1:onstate-1)[-t]) {
3762c27d820a Uploaded
greg
parents:
diff changeset
175 nrt = rbind(nrt, cbind(j, rt[rt[,1]==i,-1]));
3762c27d820a Uploaded
greg
parents:
diff changeset
176 j = j + 1;
3762c27d820a Uploaded
greg
parents:
diff changeset
177 }
3762c27d820a Uploaded
greg
parents:
diff changeset
178 rt = nrt;
3762c27d820a Uploaded
greg
parents:
diff changeset
179 ooo = ooo[-t];
3762c27d820a Uploaded
greg
parents:
diff changeset
180 }
3762c27d820a Uploaded
greg
parents:
diff changeset
181 nrt = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
182 for(i in 0:max(rt[,1])) {
3762c27d820a Uploaded
greg
parents:
diff changeset
183 t = which(rt[,1]==i);
3762c27d820a Uploaded
greg
parents:
diff changeset
184 nrt = rbind(nrt, apply(array(rt[t,], dim=c(length(t), dim(rt)[2])), 2, sum)[-1]);
3762c27d820a Uploaded
greg
parents:
diff changeset
185 }
3762c27d820a Uploaded
greg
parents:
diff changeset
186 rt = nrt;
3762c27d820a Uploaded
greg
parents:
diff changeset
187 colnames(rt) = myheader;
3762c27d820a Uploaded
greg
parents:
diff changeset
188 O = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
189 Ip = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
190 Xp = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
191 k = 0;
3762c27d820a Uploaded
greg
parents:
diff changeset
192 for(i in 1:length(parafiles)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
193 str = gsub(".para", ".profile", parafiles[i]);
3762c27d820a Uploaded
greg
parents:
diff changeset
194 p = as.matrix(read.table(str));
3762c27d820a Uploaded
greg
parents:
diff changeset
195 u = array(0, dim=c(dim(p)[1], length(ooo)));
3762c27d820a Uploaded
greg
parents:
diff changeset
196 for(j in 1:length(ooo)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
197 t = which(m[k+1:K[i]] == ooo[j]);
3762c27d820a Uploaded
greg
parents:
diff changeset
198 u[,j] = apply(array(p[,1+t], dim=c(dim(p)[1], length(t))), 1, sum);
3762c27d820a Uploaded
greg
parents:
diff changeset
199 }
3762c27d820a Uploaded
greg
parents:
diff changeset
200 k = k + K[i];
3762c27d820a Uploaded
greg
parents:
diff changeset
201 u = u / (apply(u, 1, sum) + 1e-10);
3762c27d820a Uploaded
greg
parents:
diff changeset
202 Xp = rbind(Xp, cbind(p[,1], u));
3762c27d820a Uploaded
greg
parents:
diff changeset
203 Ip = c(Ip, rep(i,dim(u)[1]));
3762c27d820a Uploaded
greg
parents:
diff changeset
204 }
3762c27d820a Uploaded
greg
parents:
diff changeset
205 hp = hclust(dist(((Xp[,-1]+min(1e-3, min(Xp[,-1][Xp[,-1]>0]))))), method=method);
3762c27d820a Uploaded
greg
parents:
diff changeset
206 ocut = min(mycut/2, length(parafiles)/2);
3762c27d820a Uploaded
greg
parents:
diff changeset
207 t = range(as.integer(table(Ip)));
3762c27d820a Uploaded
greg
parents:
diff changeset
208 Kp = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
209 for(i in t[1]:(t[2]*2)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
210 m = cutree(hp, k=i);
3762c27d820a Uploaded
greg
parents:
diff changeset
211 tt = table(Ip,m);
3762c27d820a Uploaded
greg
parents:
diff changeset
212 ll = apply(tt, 2, function(x){length(which(x>0))});
3762c27d820a Uploaded
greg
parents:
diff changeset
213 Kp = c(Kp, length(which(ll>=ocut)));
3762c27d820a Uploaded
greg
parents:
diff changeset
214 }
3762c27d820a Uploaded
greg
parents:
diff changeset
215 oN = (t[1]:(t[2]*2))[which(Kp==max(Kp))[1]];
3762c27d820a Uploaded
greg
parents:
diff changeset
216 m = cutree(hp, k=oN);
3762c27d820a Uploaded
greg
parents:
diff changeset
217 tt = table(Ip,m);
3762c27d820a Uploaded
greg
parents:
diff changeset
218 ll = apply(tt, 2, function(x){length(which(x>0))});
3762c27d820a Uploaded
greg
parents:
diff changeset
219 tt = which(ll>=ocut);
3762c27d820a Uploaded
greg
parents:
diff changeset
220 for(i in tt) {
3762c27d820a Uploaded
greg
parents:
diff changeset
221 t = which(m==i);
3762c27d820a Uploaded
greg
parents:
diff changeset
222 O = rbind(O, c(sum(Xp[t, 1]), apply(array(Xp[t,-1]*Xp[t,1], dim=c(length(t), dim(Xp)[2]-1)), 2, sum)/sum(Xp[t, 1])));
3762c27d820a Uploaded
greg
parents:
diff changeset
223 }
3762c27d820a Uploaded
greg
parents:
diff changeset
224 nrt = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
225 nrt$para = rt;
3762c27d820a Uploaded
greg
parents:
diff changeset
226 nrt$profile = O;
3762c27d820a Uploaded
greg
parents:
diff changeset
227 return(nrt);
3762c27d820a Uploaded
greg
parents:
diff changeset
228 }
3762c27d820a Uploaded
greg
parents:
diff changeset
229
3762c27d820a Uploaded
greg
parents:
diff changeset
230 compare_two <- function(n, m) {
3762c27d820a Uploaded
greg
parents:
diff changeset
231 NN = get_mean(n);
3762c27d820a Uploaded
greg
parents:
diff changeset
232 MM = get_mean(m);
3762c27d820a Uploaded
greg
parents:
diff changeset
233 p = (-3 + sqrt(9 + 8 * (dim(n)[2] - 2))) / 2;
3762c27d820a Uploaded
greg
parents:
diff changeset
234 dd = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
235 for (i in 1:dim(NN)[1]) {
3762c27d820a Uploaded
greg
parents:
diff changeset
236 dd[i] = min(apply(array(MM[,1:p], dim=c(dim(MM)[1],p)), 1, function(x){sqrt(sum((x-NN[i,1:p])^2))}));
3762c27d820a Uploaded
greg
parents:
diff changeset
237 }
3762c27d820a Uploaded
greg
parents:
diff changeset
238 for (i in 1:dim(MM)[1]) {
3762c27d820a Uploaded
greg
parents:
diff changeset
239 dd[i+dim(NN)[1]] = min(apply(array(NN[,1:p], dim=c(dim(NN)[1],p)), 1, function(x){sqrt(sum((x-MM[i,1:p])^2))}));
3762c27d820a Uploaded
greg
parents:
diff changeset
240 }
3762c27d820a Uploaded
greg
parents:
diff changeset
241 return(dd);
3762c27d820a Uploaded
greg
parents:
diff changeset
242 }
3762c27d820a Uploaded
greg
parents:
diff changeset
243
3762c27d820a Uploaded
greg
parents:
diff changeset
244 get_mean <- function(n) {
3762c27d820a Uploaded
greg
parents:
diff changeset
245 N = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
246 for(i in sort(unique(n[,1]))) {
3762c27d820a Uploaded
greg
parents:
diff changeset
247 t = which(n[,1]==i);
3762c27d820a Uploaded
greg
parents:
diff changeset
248 N = rbind(N, apply(array(n[t,], dim=c(length(t), dim(n)[2])), 2, sum)[-1]);
3762c27d820a Uploaded
greg
parents:
diff changeset
249 }
3762c27d820a Uploaded
greg
parents:
diff changeset
250 NN = N[,-1] / N[,1];
3762c27d820a Uploaded
greg
parents:
diff changeset
251 return(array(NN, dim=c(length(NN)/(dim(n)[2]-2), dim(n)[2]-2)));
3762c27d820a Uploaded
greg
parents:
diff changeset
252 }
3762c27d820a Uploaded
greg
parents:
diff changeset
253
3762c27d820a Uploaded
greg
parents:
diff changeset
254 make_parameter <- function(myorder, id, mem, mycut, para) {
3762c27d820a Uploaded
greg
parents:
diff changeset
255 rt = NULL;
3762c27d820a Uploaded
greg
parents:
diff changeset
256 j = 0;
3762c27d820a Uploaded
greg
parents:
diff changeset
257 for(i in myorder) {
3762c27d820a Uploaded
greg
parents:
diff changeset
258 t = which(mem==i);
3762c27d820a Uploaded
greg
parents:
diff changeset
259 if (length(unique(id[t])) >= mycut) {
3762c27d820a Uploaded
greg
parents:
diff changeset
260 rt = rbind(rt, cbind(j, array(para[t,], dim=c(length(t), dim(para)[2]))));
3762c27d820a Uploaded
greg
parents:
diff changeset
261 j = j + 1;
3762c27d820a Uploaded
greg
parents:
diff changeset
262 }
3762c27d820a Uploaded
greg
parents:
diff changeset
263 }
3762c27d820a Uploaded
greg
parents:
diff changeset
264 return(rt);
3762c27d820a Uploaded
greg
parents:
diff changeset
265 }
3762c27d820a Uploaded
greg
parents:
diff changeset
266
3762c27d820a Uploaded
greg
parents:
diff changeset
267 run_cmd <- function(cmd, save_ideas_log, output_log, default_log_name) {
3762c27d820a Uploaded
greg
parents:
diff changeset
268 cat("\n\n >>>>> cmd:\n", cmd, "\n\n");
3762c27d820a Uploaded
greg
parents:
diff changeset
269 rc = system(cmd);
3762c27d820a Uploaded
greg
parents:
diff changeset
270 if (rc != 0) {
3762c27d820a Uploaded
greg
parents:
diff changeset
271 if (is.null(save_ideas_log)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
272 file.rename(default_log_name, output_log);
3762c27d820a Uploaded
greg
parents:
diff changeset
273 }
3762c27d820a Uploaded
greg
parents:
diff changeset
274 quit(rc);
3762c27d820a Uploaded
greg
parents:
diff changeset
275 }
3762c27d820a Uploaded
greg
parents:
diff changeset
276 }
3762c27d820a Uploaded
greg
parents:
diff changeset
277
3762c27d820a Uploaded
greg
parents:
diff changeset
278 default_log_name = "ideas_log.txt";
3762c27d820a Uploaded
greg
parents:
diff changeset
279 output_base_name = opt$project_name;
3762c27d820a Uploaded
greg
parents:
diff changeset
280 cmd = paste("ideas", opt$prep_output_config, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
281 if (!is.null(opt$windows_bed)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
282 cmd = paste(cmd, opt$windows_bed, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
283 }
3762c27d820a Uploaded
greg
parents:
diff changeset
284 if (!is.null(opt$training_iterations)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
285 cmd = paste(cmd, "-impute none", sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
286 }
3762c27d820a Uploaded
greg
parents:
diff changeset
287 if (opt$bychr) {
3762c27d820a Uploaded
greg
parents:
diff changeset
288 cmd = paste(cmd, "-bychr", sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
289 }
3762c27d820a Uploaded
greg
parents:
diff changeset
290 if (opt$hp) {
3762c27d820a Uploaded
greg
parents:
diff changeset
291 cmd = paste(cmd, "-hp", sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
292 }
3762c27d820a Uploaded
greg
parents:
diff changeset
293 if (opt$norm) {
3762c27d820a Uploaded
greg
parents:
diff changeset
294 cmd = paste(cmd, "-norm", sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
295 }
3762c27d820a Uploaded
greg
parents:
diff changeset
296 if (!is.null(opt$window_start) && !is.null(opt$window_end)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
297 cmd = paste(cmd, "-inv", opt$window_start, opt$window_end, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
298 }
3762c27d820a Uploaded
greg
parents:
diff changeset
299 if (!is.null(opt$log2)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
300 cmd = paste(cmd, "-log2", opt$log2, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
301 }
3762c27d820a Uploaded
greg
parents:
diff changeset
302 if (!is.null(opt$max_states)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
303 cmd = paste(cmd, "-G", opt$max_states, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
304 }
3762c27d820a Uploaded
greg
parents:
diff changeset
305 if (!is.null(opt$initial_states)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
306 cmd = paste(cmd, "-C", opt$initial_states, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
307 }
3762c27d820a Uploaded
greg
parents:
diff changeset
308 if (!is.null(opt$max_position_classes)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
309 cmd = paste(cmd, "-P", opt$max_position_classes, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
310 }
3762c27d820a Uploaded
greg
parents:
diff changeset
311 if (!is.null(opt$max_cell_type_clusters)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
312 cmd = paste(cmd, "-K", opt$max_cell_type_clusters, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
313 }
3762c27d820a Uploaded
greg
parents:
diff changeset
314 if (!is.null(opt$prior_concentration)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
315 cmd = paste(cmd, "-A", opt$prior_concentration, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
316 }
3762c27d820a Uploaded
greg
parents:
diff changeset
317 cmd = paste(cmd, "-sample", opt$burnin_num, opt$mcmc_num, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
318 if (!is.null(opt$minerr)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
319 cmd = paste(cmd, "-minerr", opt$minerr, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
320 }
3762c27d820a Uploaded
greg
parents:
diff changeset
321 if (!is.null(opt$maxerr)) {
3762c27d820a Uploaded
greg
parents:
diff changeset
322 cmd = paste(cmd, "-maxerr", opt$maxerr, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
323 }
3762c27d820a Uploaded
greg
parents:
diff changeset
324 cmd = paste(cmd, "-rseed", opt$rseed, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
325 cmd = paste(cmd, "-thread", opt$thread, sep=" ");
3762c27d820a Uploaded
greg
parents:
diff changeset
326
3762c27d820a Uploaded
greg
parents:
diff changeset
327 if (is.null(opt$training_iterations)) {
152
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
328 final_cmd = paste(cmd, "-o", output_base_name, sep=" ");
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
329 final_cmd = add_output_redirect(final_cmd, opt$save_ideas_log, opt$output_log, default_log_name);
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
330 run_cmd(final_cmd, opt$save_ideas_log, opt$output_log, default_log_name);
150
3762c27d820a Uploaded
greg
parents:
diff changeset
331 } else {
152
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
332 output_para0 = paste(output_base_name, ".para0", sep="");
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
333 output_profile0 = paste(output_base_name, ".profile0", sep="");
150
3762c27d820a Uploaded
greg
parents:
diff changeset
334 for (i in 1:opt$training_iterations) {
152
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
335 final_cmd = paste(cmd, "-o", paste(output_base_name, ".tmp.", i, sep=""), sep=" ");
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
336 final_cmd = add_output_redirect(final_cmd, opt$save_ideas_log, opt$output_log, default_log_name);
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
337 run_cmd(final_cmd, opt$save_ideas_log, opt$output_log, default_log_name);
150
3762c27d820a Uploaded
greg
parents:
diff changeset
338 }
3762c27d820a Uploaded
greg
parents:
diff changeset
339 tpara = combine_state(paste(output_base_name, ".tmp.", (1:opt$training_iterations), ".para", sep=""), mycut=0.5);
3762c27d820a Uploaded
greg
parents:
diff changeset
340 para = tpara$para;
152
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
341 write.table(tpara$profile, output_profile0, quote=F, row.names=F, col.names=F);
150
3762c27d820a Uploaded
greg
parents:
diff changeset
342 para = apply(para, 1, function(x){paste(x, collapse=" ")});
3762c27d820a Uploaded
greg
parents:
diff changeset
343 para = c(readLines(paste(output_base_name, ".tmp.1.para", sep=""), n=1), para);
3762c27d820a Uploaded
greg
parents:
diff changeset
344 writeLines(para, output_para0);
152
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
345 cmd = c(cmd, "-otherpara", output_para0, output_profile0);
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
346 if (length(which(cmd == "-G")) == 0) {
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
347 cmd = c(cmd, "-G", length(para)-1);
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
348 } else {
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
349 tt = which(cmd == "-G");
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
350 cmd[tt + 1] = length(para)-1;
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
351 }
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
352 tt = which(cmd == '-C');
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
353 if(length(tt)>0) {
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
354 cmd = cmd[-c(tt, tt+1)];
26c26cb32137 Uploaded
greg
parents: 150
diff changeset
355 }
150
3762c27d820a Uploaded
greg
parents:
diff changeset
356 }