| 
80
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 import sys, string
 | 
| 
 | 
     4 #from rpy import *
 | 
| 
 | 
     5 import rpy2.robjects as robjects
 | 
| 
 | 
     6 import rpy2.rlike.container as rlc
 | 
| 
 | 
     7 from rpy2.robjects.packages import importr
 | 
| 
 | 
     8 r = robjects.r
 | 
| 
 | 
     9 grdevices = importr('grDevices')
 | 
| 
 | 
    10 import numpy
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 def stop_err(msg):
 | 
| 
 | 
    14     sys.stderr.write(msg)
 | 
| 
 | 
    15     sys.exit()
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 infile = sys.argv[1]
 | 
| 
 | 
    18 x_cols = sys.argv[2].split(',')
 | 
| 
 | 
    19 y_cols = sys.argv[3].split(',')
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 x_scale = x_center = "FALSE"
 | 
| 
 | 
    22 if sys.argv[4] == 'both':
 | 
| 
 | 
    23     x_scale = x_center = "TRUE"
 | 
| 
 | 
    24 elif sys.argv[4] == 'center':
 | 
| 
 | 
    25     x_center = "TRUE"
 | 
| 
 | 
    26 elif sys.argv[4] == 'scale':
 | 
| 
 | 
    27     x_scale = "TRUE"
 | 
| 
 | 
    28     
 | 
| 
 | 
    29 y_scale = y_center = "FALSE"
 | 
| 
 | 
    30 if sys.argv[5] == 'both':
 | 
| 
 | 
    31     y_scale = y_center = "TRUE"
 | 
| 
 | 
    32 elif sys.argv[5] == 'center':
 | 
| 
 | 
    33     y_center = "TRUE"
 | 
| 
 | 
    34 elif sys.argv[5] == 'scale':
 | 
| 
 | 
    35     y_scale = "TRUE"
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 std_scores = "FALSE"   
 | 
| 
 | 
    38 if sys.argv[6] == "yes":
 | 
| 
 | 
    39     std_scores = "TRUE"
 | 
| 
 | 
    40     
 | 
| 
 | 
    41 outfile = sys.argv[7]
 | 
| 
 | 
    42 outfile2 = sys.argv[8]
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 fout = open(outfile,'w')
 | 
| 
 | 
    45 elems = []
 | 
| 
 | 
    46 for i, line in enumerate( file ( infile )):
 | 
| 
 | 
    47     line = line.rstrip('\r\n')
 | 
| 
 | 
    48     if len( line )>0 and not line.startswith( '#' ):
 | 
| 
 | 
    49         elems = line.split( '\t' )
 | 
| 
 | 
    50         break 
 | 
| 
 | 
    51     if i == 30:
 | 
| 
 | 
    52         break # Hopefully we'll never get here...
 | 
| 
 | 
    53 
 | 
| 
 | 
    54 if len( elems )<1:
 | 
| 
 | 
    55     stop_err( "The data in your input dataset is either missing or not formatted properly." )
 | 
| 
 | 
    56 
 | 
| 
 | 
    57 x_vals = []
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 for k,col in enumerate(x_cols):
 | 
| 
 | 
    60     x_cols[k] = int(col)-1
 | 
| 
 | 
    61     #x_vals.append([])
 | 
| 
 | 
    62 
 | 
| 
 | 
    63 y_vals = []
 | 
| 
 | 
    64 
 | 
| 
 | 
    65 for k,col in enumerate(y_cols):
 | 
| 
 | 
    66     y_cols[k] = int(col)-1
 | 
| 
 | 
    67     #y_vals.append([])
 | 
| 
 | 
    68 
 | 
| 
 | 
    69 skipped = 0
 | 
| 
 | 
    70 for ind,line in enumerate( file( infile )):
 | 
| 
 | 
    71     if line and not line.startswith( '#' ):
 | 
| 
 | 
    72         try:
 | 
| 
 | 
    73             fields = line.strip().split("\t")
 | 
| 
 | 
    74             valid_line = True
 | 
| 
 | 
    75             for col in x_cols+y_cols:
 | 
| 
 | 
    76                 try:
 | 
| 
 | 
    77                     assert float(fields[col])
 | 
| 
 | 
    78                 except:
 | 
| 
 | 
    79                     skipped += 1
 | 
| 
 | 
    80                     valid_line = False
 | 
| 
 | 
    81                     break
 | 
| 
 | 
    82             if valid_line:
 | 
| 
 | 
    83                 for k,col in enumerate(x_cols):
 | 
| 
 | 
    84                     try:
 | 
| 
 | 
    85                         xval = float(fields[col])
 | 
| 
 | 
    86                     except:
 | 
| 
 | 
    87                         xval = NaN#
 | 
| 
 | 
    88                     #x_vals[k].append(xval)
 | 
| 
 | 
    89                     x_vals.append(xval)
 | 
| 
 | 
    90                 for k,col in enumerate(y_cols):
 | 
| 
 | 
    91                     try:
 | 
| 
 | 
    92                         yval = float(fields[col])
 | 
| 
 | 
    93                     except:
 | 
| 
 | 
    94                         yval = NaN#
 | 
| 
 | 
    95                     #y_vals[k].append(yval)
 | 
| 
 | 
    96                     y_vals.append(yval)
 | 
| 
 | 
    97         except:
 | 
| 
 | 
    98             skipped += 1
 | 
| 
 | 
    99 
 | 
| 
 | 
   100 #x_vals1 = numpy.asarray(x_vals).transpose()
 | 
| 
 | 
   101 #y_vals1 = numpy.asarray(y_vals).transpose()
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 #x_dat= r.list(array(x_vals1))
 | 
| 
 | 
   104 #y_dat= r.list(array(y_vals1))
 | 
| 
 | 
   105 
 | 
| 
 | 
   106 x_dat = r['matrix'](robjects.FloatVector(x_vals),ncol=len(x_cols),byrow=True)
 | 
| 
 | 
   107 y_dat = r['matrix'](robjects.FloatVector(y_vals),ncol=len(y_cols),byrow=True)
 | 
| 
 | 
   108 
 | 
| 
 | 
   109 try:
 | 
| 
 | 
   110     r.suppressWarnings(r.library("yacca"))
 | 
| 
 | 
   111 except:
 | 
| 
 | 
   112     stop_err("Missing R library yacca.")
 | 
| 
 | 
   113     
 | 
| 
 | 
   114 #set_default_mode(NO_CONVERSION)
 | 
| 
 | 
   115 try:
 | 
| 
 | 
   116     xcolnames = ["c%d" %(el+1) for el in x_cols]
 | 
| 
 | 
   117     ycolnames = ["c%d" %(el+1) for el in y_cols]
 | 
| 
 | 
   118     #cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores))
 | 
| 
 | 
   119     cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), **{'standardize.scores':r(std_scores)})
 | 
| 
 | 
   120     #ftest = r.F_test_cca(cc)
 | 
| 
 | 
   121     ftest = r['F.test.cca'](cc)
 | 
| 
 | 
   122 except RException, rex:
 | 
| 
 | 
   123     stop_err("Encountered error while performing CCA on the input data: %s" %(rex))
 | 
| 
 | 
   124 
 | 
| 
 | 
   125 #set_default_mode(BASIC_CONVERSION)
 | 
| 
 | 
   126 summary = r.summary(cc)
 | 
| 
 | 
   127 
 | 
| 
 | 
   128 #ncomps = len(summary['corr'])
 | 
| 
 | 
   129 ncomps = len(summary.rx2('corr'))
 | 
| 
 | 
   130 #comps = summary['corr'].keys()
 | 
| 
 | 
   131 #comps = summary.rx2('corr').names
 | 
| 
 | 
   132 comps = (','.join(summary.rx2('corr').names)).split(',')
 | 
| 
 | 
   133 #corr = summary['corr'].values()
 | 
| 
 | 
   134 corr = summary.rx2('corr')
 | 
| 
 | 
   135 #xlab = summary['xlab']
 | 
| 
 | 
   136 xlab = summary.rx2('xlab')
 | 
| 
 | 
   137 #ylab = summary['ylab']
 | 
| 
 | 
   138 ylab = summary.rx2('ylab')
 | 
| 
 | 
   139 
 | 
| 
 | 
   140 for i in range(ncomps):
 | 
| 
 | 
   141     corr[comps.index('CV %s' %(i+1))] = summary.rx2('corr')[i]
 | 
| 
 | 
   142     #corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i]
 | 
| 
 | 
   143 
 | 
| 
 | 
   144 #ftest=ftest.as_py()
 | 
| 
 | 
   145 print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
 | 
| 
 | 
   146 print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr]))
 | 
| 
 | 
   147 #print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']]))
 | 
| 
 | 
   148 print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest.rx2('statistic')]))
 | 
| 
 | 
   149 #print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']]))
 | 
| 
 | 
   150 print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest.rx2('p.value')]))
 | 
| 
 | 
   151 
 | 
| 
 | 
   152 
 | 
| 
 | 
   153 print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
 | 
| 
 | 
   154 #for i,val in enumerate(summary['xcoef']):
 | 
| 
 | 
   155 #    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
 | 
| 
 | 
   156 vm = summary.rx2('xcoef')
 | 
| 
 | 
   157 for i in range(vm.nrow):
 | 
| 
 | 
   158     vals = []
 | 
| 
 | 
   159     for j in range(vm.ncol):
 | 
| 
 | 
   160        vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
 | 
| 
 | 
   161     print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
 | 
| 
 | 
   162 
 | 
| 
 | 
   163 print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
 | 
| 
 | 
   164 #for i,val in enumerate(summary['ycoef']):
 | 
| 
 | 
   165 #    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
 | 
| 
 | 
   166 vm = summary.rx2('ycoef')
 | 
| 
 | 
   167 for i in range(vm.nrow):
 | 
| 
 | 
   168     vals = []
 | 
| 
 | 
   169     for j in range(vm.ncol):
 | 
| 
 | 
   170        vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
 | 
| 
 | 
   171     print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
 | 
| 
 | 
   172 
 | 
| 
 | 
   173 print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
 | 
| 
 | 
   174 #for i,val in enumerate(summary['xstructcorr']):
 | 
| 
 | 
   175 #    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
 | 
| 
 | 
   176 vm = summary.rx2('xstructcorr')
 | 
| 
 | 
   177 for i in range(vm.nrow):
 | 
| 
 | 
   178     vals = []
 | 
| 
 | 
   179     for j in range(vm.ncol):
 | 
| 
 | 
   180        vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
 | 
| 
 | 
   181     print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
 | 
| 
 | 
   182 
 | 
| 
 | 
   183 print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
 | 
| 
 | 
   184 #for i,val in enumerate(summary['ystructcorr']):
 | 
| 
 | 
   185 #    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
 | 
| 
 | 
   186 vm = summary.rx2('ystructcorr')
 | 
| 
 | 
   187 for i in range(vm.nrow):
 | 
| 
 | 
   188     vals = []
 | 
| 
 | 
   189     for j in range(vm.ncol):
 | 
| 
 | 
   190        vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
 | 
| 
 | 
   191     print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
 | 
| 
 | 
   192 
 | 
| 
 | 
   193 print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
 | 
| 
 | 
   194 #for i,val in enumerate(summary['xcrosscorr']):
 | 
| 
 | 
   195 #    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
 | 
| 
 | 
   196 vm = summary.rx2('xcrosscorr')
 | 
| 
 | 
   197 for i in range(vm.nrow):
 | 
| 
 | 
   198     vals = []
 | 
| 
 | 
   199     for j in range(vm.ncol):
 | 
| 
 | 
   200        vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
 | 
| 
 | 
   201     print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
 | 
| 
 | 
   202 
 | 
| 
 | 
   203 print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
 | 
| 
 | 
   204 #for i,val in enumerate(summary['ycrosscorr']):
 | 
| 
 | 
   205 #    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
 | 
| 
 | 
   206 vm = summary.rx2('ycrosscorr')
 | 
| 
 | 
   207 for i in range(vm.nrow):
 | 
| 
 | 
   208     vals = []
 | 
| 
 | 
   209     for j in range(vm.ncol):
 | 
| 
 | 
   210        vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
 | 
| 
 | 
   211     print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
 | 
| 
 | 
   212 
 | 
| 
 | 
   213 r.pdf( outfile2, 8, 8 )
 | 
| 
 | 
   214 #r.plot(cc)
 | 
| 
 | 
   215 for i in range(ncomps):
 | 
| 
 | 
   216     r['helio.plot'](cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance")
 | 
| 
 | 
   217 #r.dev_off()
 | 
| 
 | 
   218 grdevices.dev_off()
 | 
| 
 | 
   219 
 |