| 80 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 from galaxy import eggs | 
|  | 4 import sys, string | 
|  | 5 #from rpy import * | 
|  | 6 import rpy2.robjects as robjects | 
|  | 7 import rpy2.rlike.container as rlc | 
|  | 8 from rpy2.robjects.packages import importr | 
|  | 9 r = robjects.r | 
|  | 10 grdevices = importr('grDevices') | 
|  | 11 import numpy | 
|  | 12 | 
|  | 13 def stop_err(msg): | 
|  | 14     sys.stderr.write(msg) | 
|  | 15     sys.exit() | 
|  | 16 | 
|  | 17 infile = sys.argv[1] | 
|  | 18 x_cols = sys.argv[2].split(',') | 
|  | 19 method = sys.argv[3] | 
|  | 20 outfile = sys.argv[4] | 
|  | 21 outfile2 = sys.argv[5] | 
|  | 22 | 
|  | 23 if method == 'svd': | 
|  | 24     scale = center = "FALSE" | 
|  | 25     if sys.argv[6] == 'both': | 
|  | 26         scale = center = "TRUE" | 
|  | 27     elif sys.argv[6] == 'center': | 
|  | 28         center = "TRUE" | 
|  | 29     elif sys.argv[6] == 'scale': | 
|  | 30         scale = "TRUE" | 
|  | 31 | 
|  | 32 fout = open(outfile,'w') | 
|  | 33 elems = [] | 
|  | 34 for i, line in enumerate( file ( infile )): | 
|  | 35     line = line.rstrip('\r\n') | 
|  | 36     if len( line )>0 and not line.startswith( '#' ): | 
|  | 37         elems = line.split( '\t' ) | 
|  | 38         break | 
|  | 39     if i == 30: | 
|  | 40         break # Hopefully we'll never get here... | 
|  | 41 | 
|  | 42 if len( elems )<1: | 
|  | 43     stop_err( "The data in your input dataset is either missing or not formatted properly." ) | 
|  | 44 | 
|  | 45 x_vals = [] | 
|  | 46 | 
|  | 47 for k,col in enumerate(x_cols): | 
|  | 48     x_cols[k] = int(col)-1 | 
|  | 49     # x_vals.append([]) | 
|  | 50 | 
|  | 51 NA = 'NA' | 
|  | 52 skipped = 0 | 
|  | 53 for ind,line in enumerate( file( infile )): | 
|  | 54     if line and not line.startswith( '#' ): | 
|  | 55         try: | 
|  | 56             fields = line.strip().split("\t") | 
|  | 57             valid_line = True | 
|  | 58             for k,col in enumerate(x_cols): | 
|  | 59                 try: | 
|  | 60                     xval = float(fields[col]) | 
|  | 61                 except: | 
|  | 62                     skipped += 1 | 
|  | 63                     valid_line = False | 
|  | 64                     break | 
|  | 65             if valid_line: | 
|  | 66                 for k,col in enumerate(x_cols): | 
|  | 67                     xval = float(fields[col]) | 
|  | 68                     #x_vals[k].append(xval) | 
|  | 69                     x_vals.append(xval) | 
|  | 70         except: | 
|  | 71             skipped += 1 | 
|  | 72 | 
|  | 73 #x_vals1 = numpy.asarray(x_vals).transpose() | 
|  | 74 #dat= r.list(array(x_vals1)) | 
|  | 75 dat = r['matrix'](robjects.FloatVector(x_vals),ncol=len(x_cols),byrow=True) | 
|  | 76 | 
|  | 77 #set_default_mode(NO_CONVERSION) | 
|  | 78 try: | 
|  | 79     if method == "cor": | 
|  | 80         #pc = r.princomp(r.na_exclude(dat), cor = r("TRUE")) | 
|  | 81         pc = r.princomp(r['na.exclude'](dat), cor = r("TRUE")) | 
|  | 82     elif method == "cov": | 
|  | 83         #pc = r.princomp(r.na_exclude(dat), cor = r("FALSE")) | 
|  | 84         pc = r.princomp(r['na.exclude'](dat), cor = r("FALSE")) | 
|  | 85     elif method=="svd": | 
|  | 86         #pc = r.prcomp(r.na_exclude(dat), center = r(center), scale = r(scale)) | 
|  | 87         pc = r.prcomp(r['na.exclude'](dat), center = r(center), scale = r(scale)) | 
|  | 88 #except RException, rex: | 
|  | 89 except Exception, rex:  # need to find rpy2 RException | 
|  | 90     stop_err("Encountered error while performing PCA on the input data: %s" %(rex)) | 
|  | 91 | 
|  | 92 #set_default_mode(BASIC_CONVERSION) | 
|  | 93 summary = r.summary(pc, loadings="TRUE") | 
|  | 94 #ncomps = len(summary['sdev']) | 
|  | 95 ncomps = len(summary.rx2('sdev')) | 
|  | 96 | 
|  | 97 #if type(summary['sdev']) == type({}): | 
|  | 98 #    comps_unsorted = summary['sdev'].keys() | 
|  | 99 #    comps=[] | 
|  | 100 #    sd = summary['sdev'].values() | 
|  | 101 #    for i in range(ncomps): | 
|  | 102 #        sd[i] = summary['sdev'].values()[comps_unsorted.index('Comp.%s' %(i+1))] | 
|  | 103 #        comps.append('Comp.%s' %(i+1)) | 
|  | 104 #elif type(summary['sdev']) == type([]): | 
|  | 105 #    comps=[] | 
|  | 106 #    for i in range(ncomps): | 
|  | 107 #        comps.append('Comp.%s' %(i+1)) | 
|  | 108 #        sd = summary['sdev'] | 
|  | 109 | 
|  | 110 comps=[] | 
|  | 111 for i in range(ncomps): | 
|  | 112      comps.append('Comp.%s' %(i+1)) | 
|  | 113 sd = summary.rx2('sdev') | 
|  | 114 | 
|  | 115 print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 116 #print >>fout, "#Std. deviation\t%s" %("\t".join(["%.4g" % el for el in sd])) | 
|  | 117 print >>fout, "#Std. deviation\t%s" %("\t".join(["%.4g" % el for el in sd])) | 
|  | 118 total_var = 0 | 
|  | 119 vars = [] | 
|  | 120 for s in sd: | 
|  | 121     var = s*s | 
|  | 122     total_var += var | 
|  | 123     vars.append(var) | 
|  | 124 for i,var in enumerate(vars): | 
|  | 125     vars[i] = vars[i]/total_var | 
|  | 126 | 
|  | 127 print >>fout, "#Proportion of variance explained\t%s" %("\t".join(["%.4g" % el for el in vars])) | 
|  | 128 | 
|  | 129 print >>fout, "#Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 130 xcolnames = ["c%d" %(el+1) for el in x_cols] | 
|  | 131 #if 'loadings' in summary: #in case of princomp | 
|  | 132 if 'loadings' in summary.names: #in case of princomp | 
|  | 133     loadings = 'loadings' | 
|  | 134 #elif 'rotation' in summary: #in case of prcomp | 
|  | 135 elif 'rotation' in summary.names: #in case of prcomp | 
|  | 136     loadings = 'rotation' | 
|  | 137 #for i,val in enumerate(summary[loadings]): | 
|  | 138 #    print >>fout, "%s\t%s" %(xcolnames[i], "\t".join(["%.4g" % el for el in val])) | 
|  | 139 vm = summary.rx2(loadings) | 
|  | 140 for i in range(vm.nrow): | 
|  | 141     vals = [] | 
|  | 142     for j in range(vm.ncol): | 
|  | 143        vals.append("%.4g" % vm.rx2(i+1,j+1)[0]) | 
|  | 144     print >>fout, "%s\t%s" %(xcolnames[i], "\t".join(vals)) | 
|  | 145 | 
|  | 146 print >>fout, "#Scores\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 147 #if 'scores' in summary: #in case of princomp | 
|  | 148 if 'scores' in summary.names: #in case of princomp | 
|  | 149     scores = 'scores' | 
|  | 150 #elif 'x' in summary: #in case of prcomp | 
|  | 151 elif 'x' in summary.names: #in case of prcomp | 
|  | 152     scores = 'x' | 
|  | 153 #for obs,sc in enumerate(summary[scores]): | 
|  | 154 #    print >>fout, "%s\t%s" %(obs+1, "\t".join(["%.4g" % el for el in sc])) | 
|  | 155 vm = summary.rx2(scores) | 
|  | 156 for i in range(vm.nrow): | 
|  | 157     vals = [] | 
|  | 158     for j in range(vm.ncol): | 
|  | 159        vals.append("%.4g" % vm.rx2(i+1,j+1)[0]) | 
|  | 160     print >>fout, "%s\t%s" %(i+1, "\t".join(vals)) | 
|  | 161 r.pdf( outfile2, 8, 8 ) | 
|  | 162 r.biplot(pc) | 
|  | 163 #r.dev_off() | 
|  | 164 grdevices.dev_off() | 
|  | 165 |