| 80 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 from galaxy import eggs | 
|  | 4 import sys, string | 
|  | 5 #from rpy import * | 
|  | 6 import rpy2.robjects as robjects | 
|  | 7 import rpy2.rlike.container as rlc | 
|  | 8 from rpy2.robjects.packages import importr | 
|  | 9 r = robjects.r | 
|  | 10 grdevices = importr('grDevices') | 
|  | 11 import numpy | 
|  | 12 | 
|  | 13 | 
|  | 14 def stop_err(msg): | 
|  | 15     sys.stderr.write(msg) | 
|  | 16     sys.exit() | 
|  | 17 | 
|  | 18 infile = sys.argv[1] | 
|  | 19 x_cols = sys.argv[2].split(',') | 
|  | 20 y_cols = sys.argv[3].split(',') | 
|  | 21 | 
|  | 22 x_scale = x_center = "FALSE" | 
|  | 23 if sys.argv[4] == 'both': | 
|  | 24     x_scale = x_center = "TRUE" | 
|  | 25 elif sys.argv[4] == 'center': | 
|  | 26     x_center = "TRUE" | 
|  | 27 elif sys.argv[4] == 'scale': | 
|  | 28     x_scale = "TRUE" | 
|  | 29 | 
|  | 30 y_scale = y_center = "FALSE" | 
|  | 31 if sys.argv[5] == 'both': | 
|  | 32     y_scale = y_center = "TRUE" | 
|  | 33 elif sys.argv[5] == 'center': | 
|  | 34     y_center = "TRUE" | 
|  | 35 elif sys.argv[5] == 'scale': | 
|  | 36     y_scale = "TRUE" | 
|  | 37 | 
|  | 38 std_scores = "FALSE" | 
|  | 39 if sys.argv[6] == "yes": | 
|  | 40     std_scores = "TRUE" | 
|  | 41 | 
|  | 42 outfile = sys.argv[7] | 
|  | 43 outfile2 = sys.argv[8] | 
|  | 44 | 
|  | 45 fout = open(outfile,'w') | 
|  | 46 elems = [] | 
|  | 47 for i, line in enumerate( file ( infile )): | 
|  | 48     line = line.rstrip('\r\n') | 
|  | 49     if len( line )>0 and not line.startswith( '#' ): | 
|  | 50         elems = line.split( '\t' ) | 
|  | 51         break | 
|  | 52     if i == 30: | 
|  | 53         break # Hopefully we'll never get here... | 
|  | 54 | 
|  | 55 if len( elems )<1: | 
|  | 56     stop_err( "The data in your input dataset is either missing or not formatted properly." ) | 
|  | 57 | 
|  | 58 x_vals = [] | 
|  | 59 | 
|  | 60 for k,col in enumerate(x_cols): | 
|  | 61     x_cols[k] = int(col)-1 | 
|  | 62     #x_vals.append([]) | 
|  | 63 | 
|  | 64 y_vals = [] | 
|  | 65 | 
|  | 66 for k,col in enumerate(y_cols): | 
|  | 67     y_cols[k] = int(col)-1 | 
|  | 68     #y_vals.append([]) | 
|  | 69 | 
|  | 70 skipped = 0 | 
|  | 71 for ind,line in enumerate( file( infile )): | 
|  | 72     if line and not line.startswith( '#' ): | 
|  | 73         try: | 
|  | 74             fields = line.strip().split("\t") | 
|  | 75             valid_line = True | 
|  | 76             for col in x_cols+y_cols: | 
|  | 77                 try: | 
|  | 78                     assert float(fields[col]) | 
|  | 79                 except: | 
|  | 80                     skipped += 1 | 
|  | 81                     valid_line = False | 
|  | 82                     break | 
|  | 83             if valid_line: | 
|  | 84                 for k,col in enumerate(x_cols): | 
|  | 85                     try: | 
|  | 86                         xval = float(fields[col]) | 
|  | 87                     except: | 
|  | 88                         xval = NaN# | 
|  | 89                     #x_vals[k].append(xval) | 
|  | 90                     x_vals.append(xval) | 
|  | 91                 for k,col in enumerate(y_cols): | 
|  | 92                     try: | 
|  | 93                         yval = float(fields[col]) | 
|  | 94                     except: | 
|  | 95                         yval = NaN# | 
|  | 96                     #y_vals[k].append(yval) | 
|  | 97                     y_vals.append(yval) | 
|  | 98         except: | 
|  | 99             skipped += 1 | 
|  | 100 | 
|  | 101 #x_vals1 = numpy.asarray(x_vals).transpose() | 
|  | 102 #y_vals1 = numpy.asarray(y_vals).transpose() | 
|  | 103 | 
|  | 104 #x_dat= r.list(array(x_vals1)) | 
|  | 105 #y_dat= r.list(array(y_vals1)) | 
|  | 106 | 
|  | 107 x_dat = r['matrix'](robjects.FloatVector(x_vals),ncol=len(x_cols),byrow=True) | 
|  | 108 y_dat = r['matrix'](robjects.FloatVector(y_vals),ncol=len(y_cols),byrow=True) | 
|  | 109 | 
|  | 110 try: | 
|  | 111     r.suppressWarnings(r.library("yacca")) | 
|  | 112 except: | 
|  | 113     stop_err("Missing R library yacca.") | 
|  | 114 | 
|  | 115 #set_default_mode(NO_CONVERSION) | 
|  | 116 try: | 
|  | 117     xcolnames = ["c%d" %(el+1) for el in x_cols] | 
|  | 118     ycolnames = ["c%d" %(el+1) for el in y_cols] | 
|  | 119     #cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores)) | 
|  | 120     cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), **{'standardize.scores':r(std_scores)}) | 
|  | 121     #ftest = r.F_test_cca(cc) | 
|  | 122     ftest = r['F.test.cca'](cc) | 
|  | 123 except RException, rex: | 
|  | 124     stop_err("Encountered error while performing CCA on the input data: %s" %(rex)) | 
|  | 125 | 
|  | 126 #set_default_mode(BASIC_CONVERSION) | 
|  | 127 summary = r.summary(cc) | 
|  | 128 | 
|  | 129 #ncomps = len(summary['corr']) | 
|  | 130 ncomps = len(summary.rx2('corr')) | 
|  | 131 #comps = summary['corr'].keys() | 
|  | 132 #comps = summary.rx2('corr').names | 
|  | 133 comps = (','.join(summary.rx2('corr').names)).split(',') | 
|  | 134 #corr = summary['corr'].values() | 
|  | 135 corr = summary.rx2('corr') | 
|  | 136 #xlab = summary['xlab'] | 
|  | 137 xlab = summary.rx2('xlab') | 
|  | 138 #ylab = summary['ylab'] | 
|  | 139 ylab = summary.rx2('ylab') | 
|  | 140 | 
|  | 141 for i in range(ncomps): | 
|  | 142     corr[comps.index('CV %s' %(i+1))] = summary.rx2('corr')[i] | 
|  | 143     #corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i] | 
|  | 144 | 
|  | 145 #ftest=ftest.as_py() | 
|  | 146 print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 147 print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr])) | 
|  | 148 #print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']])) | 
|  | 149 print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest.rx2('statistic')])) | 
|  | 150 #print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']])) | 
|  | 151 print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest.rx2('p.value')])) | 
|  | 152 | 
|  | 153 | 
|  | 154 print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 155 #for i,val in enumerate(summary['xcoef']): | 
|  | 156 #    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) | 
|  | 157 vm = summary.rx2('xcoef') | 
|  | 158 for i in range(vm.nrow): | 
|  | 159     vals = [] | 
|  | 160     for j in range(vm.ncol): | 
|  | 161        vals.append("%.4g" % vm.rx2(i+1,j+1)[0]) | 
|  | 162     print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals)) | 
|  | 163 | 
|  | 164 print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 165 #for i,val in enumerate(summary['ycoef']): | 
|  | 166 #    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) | 
|  | 167 vm = summary.rx2('ycoef') | 
|  | 168 for i in range(vm.nrow): | 
|  | 169     vals = [] | 
|  | 170     for j in range(vm.ncol): | 
|  | 171        vals.append("%.4g" % vm.rx2(i+1,j+1)[0]) | 
|  | 172     print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals)) | 
|  | 173 | 
|  | 174 print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 175 #for i,val in enumerate(summary['xstructcorr']): | 
|  | 176 #    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) | 
|  | 177 vm = summary.rx2('xstructcorr') | 
|  | 178 for i in range(vm.nrow): | 
|  | 179     vals = [] | 
|  | 180     for j in range(vm.ncol): | 
|  | 181        vals.append("%.4g" % vm.rx2(i+1,j+1)[0]) | 
|  | 182     print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals)) | 
|  | 183 | 
|  | 184 print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 185 #for i,val in enumerate(summary['ystructcorr']): | 
|  | 186 #    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) | 
|  | 187 vm = summary.rx2('ystructcorr') | 
|  | 188 for i in range(vm.nrow): | 
|  | 189     vals = [] | 
|  | 190     for j in range(vm.ncol): | 
|  | 191        vals.append("%.4g" % vm.rx2(i+1,j+1)[0]) | 
|  | 192     print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals)) | 
|  | 193 | 
|  | 194 print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 195 #for i,val in enumerate(summary['xcrosscorr']): | 
|  | 196 #    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) | 
|  | 197 vm = summary.rx2('xcrosscorr') | 
|  | 198 for i in range(vm.nrow): | 
|  | 199     vals = [] | 
|  | 200     for j in range(vm.ncol): | 
|  | 201        vals.append("%.4g" % vm.rx2(i+1,j+1)[0]) | 
|  | 202     print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals)) | 
|  | 203 | 
|  | 204 print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 205 #for i,val in enumerate(summary['ycrosscorr']): | 
|  | 206 #    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) | 
|  | 207 vm = summary.rx2('ycrosscorr') | 
|  | 208 for i in range(vm.nrow): | 
|  | 209     vals = [] | 
|  | 210     for j in range(vm.ncol): | 
|  | 211        vals.append("%.4g" % vm.rx2(i+1,j+1)[0]) | 
|  | 212     print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals)) | 
|  | 213 | 
|  | 214 r.pdf( outfile2, 8, 8 ) | 
|  | 215 #r.plot(cc) | 
|  | 216 for i in range(ncomps): | 
|  | 217     r['helio.plot'](cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance") | 
|  | 218 #r.dev_off() | 
|  | 219 grdevices.dev_off() | 
|  | 220 |