| 80 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 """ | 
|  | 4 Run kernel PCA using kpca() from R 'kernlab' package | 
|  | 5 | 
|  | 6 usage: %prog [options] | 
|  | 7    -i, --input=i: Input file | 
|  | 8    -o, --output1=o: Summary output | 
|  | 9    -p, --output2=p: Figures output | 
|  | 10    -c, --var_cols=c: Variable columns | 
|  | 11    -k, --kernel=k: Kernel function | 
|  | 12    -f, --features=f: Number of principal components to return | 
|  | 13    -s, --sigma=s: sigma | 
|  | 14    -d, --degree=d: degree | 
|  | 15    -l, --scale=l: scale | 
|  | 16    -t, --offset=t: offset | 
|  | 17    -r, --order=r: order | 
|  | 18 | 
|  | 19 usage: %prog input output1 output2 var_cols kernel features sigma(or_None) degree(or_None) scale(or_None) offset(or_None) order(or_None) | 
|  | 20 """ | 
|  | 21 | 
|  | 22 from galaxy import eggs | 
|  | 23 import sys, string | 
|  | 24 #from rpy import * | 
|  | 25 import rpy2.robjects as robjects | 
|  | 26 import rpy2.rlike.container as rlc | 
|  | 27 from rpy2.robjects.packages import importr | 
|  | 28 r = robjects.r | 
|  | 29 grdevices = importr('grDevices') | 
|  | 30 import numpy | 
|  | 31 import pkg_resources; pkg_resources.require( "bx-python" ) | 
|  | 32 from bx.cookbook import doc_optparse | 
|  | 33 | 
|  | 34 | 
|  | 35 def stop_err(msg): | 
|  | 36     sys.stderr.write(msg) | 
|  | 37     sys.exit() | 
|  | 38 | 
|  | 39 #Parse Command Line | 
|  | 40 options, args = doc_optparse.parse( __doc__ ) | 
|  | 41 #{'options= kernel': 'rbfdot', 'var_cols': '1,2,3,4', 'degree': 'None', 'output2': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_260.dat', 'output1': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_259.dat', 'scale': 'None', 'offset': 'None', 'input': '/afs/bx.psu.edu/home/gua110/workspace/galaxy_bitbucket/database/files/000/dataset_256.dat', 'sigma': '1.0', 'order': 'None'} | 
|  | 42 | 
|  | 43 infile = options.input | 
|  | 44 x_cols = options.var_cols.split(',') | 
|  | 45 kernel = options.kernel | 
|  | 46 outfile = options.output1 | 
|  | 47 outfile2 = options.output2 | 
|  | 48 ncomps = int(options.features) | 
|  | 49 fout = open(outfile,'w') | 
|  | 50 | 
|  | 51 elems = [] | 
|  | 52 for i, line in enumerate( file ( infile )): | 
|  | 53     line = line.rstrip('\r\n') | 
|  | 54     if len( line )>0 and not line.startswith( '#' ): | 
|  | 55         elems = line.split( '\t' ) | 
|  | 56         break | 
|  | 57     if i == 30: | 
|  | 58         break # Hopefully we'll never get here... | 
|  | 59 | 
|  | 60 if len( elems )<1: | 
|  | 61     stop_err( "The data in your input dataset is either missing or not formatted properly." ) | 
|  | 62 | 
|  | 63 x_vals = [] | 
|  | 64 | 
|  | 65 for k,col in enumerate(x_cols): | 
|  | 66     x_cols[k] = int(col)-1 | 
|  | 67     #x_vals.append([]) | 
|  | 68 | 
|  | 69 NA = 'NA' | 
|  | 70 skipped = 0 | 
|  | 71 for ind,line in enumerate( file( infile )): | 
|  | 72     if line and not line.startswith( '#' ): | 
|  | 73         try: | 
|  | 74             fields = line.strip().split("\t") | 
|  | 75             for k,col in enumerate(x_cols): | 
|  | 76                 try: | 
|  | 77                     xval = float(fields[col]) | 
|  | 78                 except: | 
|  | 79                     #xval = r('NA') | 
|  | 80                     xval = NaN# | 
|  | 81                 #x_vals[k].append(xval) | 
|  | 82                 x_vals.append(xval) | 
|  | 83         except: | 
|  | 84             skipped += 1 | 
|  | 85 | 
|  | 86 #x_vals1 = numpy.asarray(x_vals).transpose() | 
|  | 87 #dat= r.list(array(x_vals1)) | 
|  | 88 dat = r['matrix'](robjects.FloatVector(x_vals),ncol=len(x_cols),byrow=True) | 
|  | 89 | 
|  | 90 | 
|  | 91 try: | 
|  | 92     r.suppressWarnings(r.library('kernlab')) | 
|  | 93 except: | 
|  | 94     stop_err('Missing R library kernlab') | 
|  | 95 | 
|  | 96 #set_default_mode(NO_CONVERSION) | 
|  | 97 if kernel=="rbfdot" or kernel=="anovadot": | 
|  | 98     pars = r.list(sigma=float(options.sigma)) | 
|  | 99 elif kernel=="polydot": | 
|  | 100     pars = r.list(degree=float(options.degree),scale=float(options.scale),offset=float(options.offset)) | 
|  | 101 elif kernel=="tanhdot": | 
|  | 102     pars = r.list(scale=float(options.scale),offset=float(options.offset)) | 
|  | 103 elif kernel=="besseldot": | 
|  | 104     pars = r.list(degree=float(options.degree),sigma=float(options.sigma),order=float(options.order)) | 
|  | 105 elif kernel=="anovadot": | 
|  | 106     pars = r.list(degree=float(options.degree),sigma=float(options.sigma)) | 
|  | 107 else: | 
|  | 108     pars = r.list() | 
|  | 109 | 
|  | 110 try: | 
|  | 111     #kpc = r.kpca(x=r.na_exclude(dat), kernel=kernel, kpar=pars, features=ncomps) | 
|  | 112     kpc = r.kpca(x=r['na.exclude'](dat), kernel=kernel, kpar=pars, features=ncomps) | 
|  | 113 #except RException, rex: | 
|  | 114 except Exception, rex:  # need to find rpy2 RException | 
|  | 115     stop_err("Encountered error while performing kPCA on the input data: %s" %(rex)) | 
|  | 116 #set_default_mode(BASIC_CONVERSION) | 
|  | 117 | 
|  | 118 eig = r.eig(kpc) | 
|  | 119 pcv = r.pcv(kpc) | 
|  | 120 rotated = r.rotated(kpc) | 
|  | 121 | 
|  | 122 #comps = eig.keys() | 
|  | 123 comps = eig.names | 
|  | 124 #eigv = eig.values() | 
|  | 125 #for i in range(ncomps): | 
|  | 126 #    eigv[comps.index('Comp.%s' %(i+1))] = eig.values()[i] | 
|  | 127 | 
|  | 128 print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 129 | 
|  | 130 #print >>fout, "#Eigenvalue\t%s" %("\t".join(["%.4g" % el for el in eig.values()])) | 
|  | 131 print >>fout, "#Eigenvalue\t%s" %("\t".join(["%.4g" % el for el in eig])) | 
|  | 132 print >>fout, "#Principal component vectors\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 133 #for obs,val in enumerate(pcv): | 
|  | 134 #    print >>fout, "%s\t%s" %(obs+1, "\t".join(["%.4g" % el for el in val])) | 
|  | 135 for i in range(1,pcv.nrow+1): | 
|  | 136     vals = [] | 
|  | 137     for j in range(1,pcv.ncol+1): | 
|  | 138        vals.append("%.4g" % pcv.rx2(i,j)[0]) | 
|  | 139     print >>fout, "%s\t%s" %(i, "\t".join(vals)) | 
|  | 140 | 
|  | 141 | 
|  | 142 print >>fout, "#Rotated values\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) | 
|  | 143 #for obs,val in enumerate(rotated): | 
|  | 144 #    print >>fout, "%s\t%s" %(obs+1, "\t".join(["%.4g" % el for el in val])) | 
|  | 145 for i in range(1,rotated.nrow+1): | 
|  | 146     vals = [] | 
|  | 147     for j in range(1,rotated.ncol+1): | 
|  | 148        vals.append("%.4g" % rotated.rx2(i,j)[0]) | 
|  | 149     print >>fout, "%s\t%s" %(i, "\t".join(vals)) | 
|  | 150 | 
|  | 151 r.pdf( outfile2, 8, 8 ) | 
|  | 152 if ncomps != 1: | 
|  | 153     #r.pairs(rotated,labels=r.list(range(1,ncomps+1)),main="Scatterplot of rotated values") | 
|  | 154     r.pairs(rotated,labels=robjects.StrVector(range(1,ncomps+1)),main="Scatterplot of rotated values") | 
|  | 155 else: | 
|  | 156     r.plot(rotated, ylab='Comp.1', main="Scatterplot of rotated values") | 
|  | 157 #r.dev_off() | 
|  | 158 grdevices.dev_off() | 
|  | 159 |