Mercurial > repos > bgruening > bg_statistical_hypothesis_testing
comparison statistical_hypothesis_testing.py @ 0:a3d8cadaf060 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/statistics commit 7c5002672919ca1e5eacacb835a4ce66ffa19656
| author | bgruening |
|---|---|
| date | Mon, 21 Nov 2022 18:07:45 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a3d8cadaf060 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 | |
| 5 """ | |
| 6 import argparse | |
| 7 | |
| 8 import numpy as np | |
| 9 from scipy import stats | |
| 10 | |
| 11 | |
| 12 def columns_to_values(args, line): | |
| 13 # here you go over every list | |
| 14 samples = [] | |
| 15 for i in args: | |
| 16 cols = line.split("\t") | |
| 17 sample_list = [] | |
| 18 for row in i: | |
| 19 sample_list.append(cols[row - 1]) | |
| 20 samples.append(list(map(int, sample_list))) | |
| 21 return samples | |
| 22 | |
| 23 | |
| 24 def main(): | |
| 25 parser = argparse.ArgumentParser() | |
| 26 parser.add_argument("-i", "--infile", required=True, help="Tabular file.") | |
| 27 parser.add_argument( | |
| 28 "-o", "--outfile", required=True, help="Path to the output file." | |
| 29 ) | |
| 30 parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") | |
| 31 parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") | |
| 32 parser.add_argument( | |
| 33 "--sample_cols", | |
| 34 help="Input format, like smi, sdf, inchi,separate arrays using ;", | |
| 35 ) | |
| 36 parser.add_argument("--test_id", help="statistical test method") | |
| 37 parser.add_argument( | |
| 38 "--mwu_use_continuity", | |
| 39 action="store_true", | |
| 40 default=False, | |
| 41 help="Whether a continuity correction (1/2.) should be taken into account.", | |
| 42 ) | |
| 43 parser.add_argument( | |
| 44 "--equal_var", | |
| 45 action="store_true", | |
| 46 default=False, | |
| 47 help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", | |
| 48 ) | |
| 49 parser.add_argument( | |
| 50 "--reta", | |
| 51 action="store_true", | |
| 52 default=False, | |
| 53 help="Whether or not to return the internally computed a values.", | |
| 54 ) | |
| 55 parser.add_argument( | |
| 56 "--fisher", | |
| 57 action="store_true", | |
| 58 default=False, | |
| 59 help="if true then Fisher definition is used", | |
| 60 ) | |
| 61 parser.add_argument( | |
| 62 "--bias", | |
| 63 action="store_true", | |
| 64 default=False, | |
| 65 help="if false,then the calculations are corrected for statistical bias", | |
| 66 ) | |
| 67 parser.add_argument( | |
| 68 "--inclusive1", | |
| 69 action="store_true", | |
| 70 default=False, | |
| 71 help="if false,lower_limit will be ignored", | |
| 72 ) | |
| 73 parser.add_argument( | |
| 74 "--inclusive2", | |
| 75 action="store_true", | |
| 76 default=False, | |
| 77 help="if false,higher_limit will be ignored", | |
| 78 ) | |
| 79 parser.add_argument( | |
| 80 "--inclusive", | |
| 81 action="store_true", | |
| 82 default=False, | |
| 83 help="if false,limit will be ignored", | |
| 84 ) | |
| 85 parser.add_argument( | |
| 86 "--printextras", | |
| 87 action="store_true", | |
| 88 default=False, | |
| 89 help="If True, if there are extra points a warning is raised saying how many of those points there are", | |
| 90 ) | |
| 91 parser.add_argument( | |
| 92 "--initial_lexsort", | |
| 93 action="store_true", | |
| 94 default="False", | |
| 95 help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", | |
| 96 ) | |
| 97 parser.add_argument( | |
| 98 "--correction", | |
| 99 action="store_true", | |
| 100 default=False, | |
| 101 help="continuity correction ", | |
| 102 ) | |
| 103 parser.add_argument( | |
| 104 "--axis", | |
| 105 type=int, | |
| 106 default=0, | |
| 107 help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", | |
| 108 ) | |
| 109 parser.add_argument( | |
| 110 "--n", | |
| 111 type=int, | |
| 112 default=0, | |
| 113 help="the number of trials. This is ignored if x gives both the number of successes and failures", | |
| 114 ) | |
| 115 parser.add_argument( | |
| 116 "--b", type=int, default=0, help="The number of bins to use for the histogram" | |
| 117 ) | |
| 118 parser.add_argument( | |
| 119 "--N", type=int, default=0, help="Score that is compared to the elements in a." | |
| 120 ) | |
| 121 parser.add_argument( | |
| 122 "--ddof", type=int, default=0, help="Degrees of freedom correction" | |
| 123 ) | |
| 124 parser.add_argument( | |
| 125 "--score", | |
| 126 type=int, | |
| 127 default=0, | |
| 128 help="Score that is compared to the elements in a.", | |
| 129 ) | |
| 130 parser.add_argument("--m", type=float, default=0.0, help="limits") | |
| 131 parser.add_argument("--mf", type=float, default=2.0, help="lower limit") | |
| 132 parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") | |
| 133 parser.add_argument( | |
| 134 "--p", | |
| 135 type=float, | |
| 136 default=0.5, | |
| 137 help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", | |
| 138 ) | |
| 139 parser.add_argument("--alpha", type=float, default=0.9, help="probability") | |
| 140 parser.add_argument( | |
| 141 "--new", | |
| 142 type=float, | |
| 143 default=0.0, | |
| 144 help="Value to put in place of values in a outside of bounds", | |
| 145 ) | |
| 146 parser.add_argument( | |
| 147 "--proportiontocut", | |
| 148 type=float, | |
| 149 default=0.0, | |
| 150 help="Proportion (in range 0-1) of total data set to trim of each end.", | |
| 151 ) | |
| 152 parser.add_argument( | |
| 153 "--lambda_", | |
| 154 type=float, | |
| 155 default=1.0, | |
| 156 help="lambda_ gives the power in the Cressie-Read power divergence statistic", | |
| 157 ) | |
| 158 parser.add_argument( | |
| 159 "--imbda", | |
| 160 type=float, | |
| 161 default=0, | |
| 162 help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", | |
| 163 ) | |
| 164 parser.add_argument( | |
| 165 "--base", | |
| 166 type=float, | |
| 167 default=1.6, | |
| 168 help="The logarithmic base to use, defaults to e", | |
| 169 ) | |
| 170 parser.add_argument("--dtype", help="dtype") | |
| 171 parser.add_argument("--med", help="med") | |
| 172 parser.add_argument("--cdf", help="cdf") | |
| 173 parser.add_argument("--zero_method", help="zero_method options") | |
| 174 parser.add_argument("--dist", help="dist options") | |
| 175 parser.add_argument("--ties", help="ties options") | |
| 176 parser.add_argument("--alternative", help="alternative options") | |
| 177 parser.add_argument("--mode", help="mode options") | |
| 178 parser.add_argument("--method", help="method options") | |
| 179 parser.add_argument("--md", help="md options") | |
| 180 parser.add_argument("--center", help="center options") | |
| 181 parser.add_argument("--kind", help="kind options") | |
| 182 parser.add_argument("--tail", help="tail options") | |
| 183 parser.add_argument("--interpolation", help="interpolation options") | |
| 184 parser.add_argument("--statistic", help="statistic options") | |
| 185 | |
| 186 args = parser.parse_args() | |
| 187 infile = args.infile | |
| 188 outfile = open(args.outfile, "w+") | |
| 189 test_id = args.test_id | |
| 190 nf = args.nf | |
| 191 mf = args.mf | |
| 192 imbda = args.imbda | |
| 193 inclusive1 = args.inclusive1 | |
| 194 inclusive2 = args.inclusive2 | |
| 195 sample0 = 0 | |
| 196 sample1 = 0 | |
| 197 sample2 = 0 | |
| 198 if args.sample_cols is not None: | |
| 199 sample0 = 1 | |
| 200 barlett_samples = [] | |
| 201 for sample in args.sample_cols.split(";"): | |
| 202 barlett_samples.append(list(map(int, sample.split(",")))) | |
| 203 if args.sample_one_cols is not None: | |
| 204 sample1 = 1 | |
| 205 sample_one_cols = args.sample_one_cols.split(",") | |
| 206 if args.sample_two_cols is not None: | |
| 207 sample_two_cols = args.sample_two_cols.split(",") | |
| 208 sample2 = 1 | |
| 209 for line in open(infile): | |
| 210 sample_one = [] | |
| 211 sample_two = [] | |
| 212 cols = line.strip().split("\t") | |
| 213 if sample0 == 1: | |
| 214 b_samples = columns_to_values(barlett_samples, line) | |
| 215 if sample1 == 1: | |
| 216 for index in sample_one_cols: | |
| 217 sample_one.append(cols[int(index) - 1]) | |
| 218 if sample2 == 1: | |
| 219 for index in sample_two_cols: | |
| 220 sample_two.append(cols[int(index) - 1]) | |
| 221 if test_id.strip() == "describe": | |
| 222 size, min_max, mean, uv, bs, bk = stats.describe( | |
| 223 list(map(float, sample_one)) | |
| 224 ) | |
| 225 cols.append(size) | |
| 226 cols.append(min_max) | |
| 227 cols.append(mean) | |
| 228 cols.append(uv) | |
| 229 cols.append(bs) | |
| 230 cols.append(bk) | |
| 231 elif test_id.strip() == "mode": | |
| 232 vals, counts = stats.mode(list(map(float, sample_one))) | |
| 233 cols.append(vals) | |
| 234 cols.append(counts) | |
| 235 elif test_id.strip() == "nanmean": | |
| 236 m = stats.nanmean(list(map(float, sample_one))) | |
| 237 cols.append(m) | |
| 238 elif test_id.strip() == "nanmedian": | |
| 239 m = stats.nanmedian(list(map(float, sample_one))) | |
| 240 cols.append(m) | |
| 241 elif test_id.strip() == "kurtosistest": | |
| 242 z_value, p_value = stats.kurtosistest(list(map(float, sample_one))) | |
| 243 cols.append(z_value) | |
| 244 cols.append(p_value) | |
| 245 elif test_id.strip() == "variation": | |
| 246 ra = stats.variation(list(map(float, sample_one))) | |
| 247 cols.append(ra) | |
| 248 elif test_id.strip() == "itemfreq": | |
| 249 freq = np.unique(list(map(float, sample_one)), return_counts=True) | |
| 250 for i in freq: | |
| 251 elements = ",".join(list(map(str, i))) | |
| 252 cols.append(elements) | |
| 253 elif test_id.strip() == "nanmedian": | |
| 254 m = stats.nanmedian(list(map(float, sample_one))) | |
| 255 cols.append(m) | |
| 256 elif test_id.strip() == "variation": | |
| 257 ra = stats.variation(list(map(float, sample_one))) | |
| 258 cols.append(ra) | |
| 259 elif test_id.strip() == "boxcox_llf": | |
| 260 IIf = stats.boxcox_llf(imbda, list(map(float, sample_one))) | |
| 261 cols.append(IIf) | |
| 262 elif test_id.strip() == "tiecorrect": | |
| 263 fa = stats.tiecorrect(list(map(float, sample_one))) | |
| 264 cols.append(fa) | |
| 265 elif test_id.strip() == "rankdata": | |
| 266 r = stats.rankdata(list(map(float, sample_one)), method=args.md) | |
| 267 cols.append(r) | |
| 268 elif test_id.strip() == "nanstd": | |
| 269 s = stats.nanstd(list(map(float, sample_one)), bias=args.bias) | |
| 270 cols.append(s) | |
| 271 elif test_id.strip() == "anderson": | |
| 272 A2, critical, sig = stats.anderson( | |
| 273 list(map(float, sample_one)), dist=args.dist | |
| 274 ) | |
| 275 cols.append(A2) | |
| 276 for i in critical: | |
| 277 cols.append(i) | |
| 278 cols.append(",") | |
| 279 for i in sig: | |
| 280 cols.append(i) | |
| 281 elif test_id.strip() == "binom_test": | |
| 282 p_value = stats.binom_test(list(map(float, sample_one)), n=args.n, p=args.p) | |
| 283 cols.append(p_value) | |
| 284 elif test_id.strip() == "gmean": | |
| 285 gm = stats.gmean(list(map(float, sample_one)), dtype=args.dtype) | |
| 286 cols.append(gm) | |
| 287 elif test_id.strip() == "hmean": | |
| 288 hm = stats.hmean(list(map(float, sample_one)), dtype=args.dtype) | |
| 289 cols.append(hm) | |
| 290 elif test_id.strip() == "kurtosis": | |
| 291 k = stats.kurtosis( | |
| 292 list(map(float, sample_one)), | |
| 293 axis=args.axis, | |
| 294 fisher=args.fisher, | |
| 295 bias=args.bias, | |
| 296 ) | |
| 297 cols.append(k) | |
| 298 elif test_id.strip() == "moment": | |
| 299 n_moment = stats.moment(list(map(float, sample_one)), n=args.n) | |
| 300 cols.append(n_moment) | |
| 301 elif test_id.strip() == "normaltest": | |
| 302 k2, p_value = stats.normaltest(list(map(float, sample_one))) | |
| 303 cols.append(k2) | |
| 304 cols.append(p_value) | |
| 305 elif test_id.strip() == "skew": | |
| 306 skewness = stats.skew(list(map(float, sample_one)), bias=args.bias) | |
| 307 cols.append(skewness) | |
| 308 elif test_id.strip() == "skewtest": | |
| 309 z_value, p_value = stats.skewtest(list(map(float, sample_one))) | |
| 310 cols.append(z_value) | |
| 311 cols.append(p_value) | |
| 312 elif test_id.strip() == "sem": | |
| 313 s = stats.sem(list(map(float, sample_one)), ddof=args.ddof) | |
| 314 cols.append(s) | |
| 315 elif test_id.strip() == "zscore": | |
| 316 z = stats.zscore(list(map(float, sample_one)), ddof=args.ddof) | |
| 317 for i in z: | |
| 318 cols.append(i) | |
| 319 elif test_id.strip() == "signaltonoise": | |
| 320 s2n = stats.signaltonoise(list(map(float, sample_one)), ddof=args.ddof) | |
| 321 cols.append(s2n) | |
| 322 elif test_id.strip() == "percentileofscore": | |
| 323 p = stats.percentileofscore( | |
| 324 list(map(float, sample_one)), score=args.score, kind=args.kind | |
| 325 ) | |
| 326 cols.append(p) | |
| 327 elif test_id.strip() == "bayes_mvs": | |
| 328 c_mean, c_var, c_std = stats.bayes_mvs( | |
| 329 list(map(float, sample_one)), alpha=args.alpha | |
| 330 ) | |
| 331 cols.append(c_mean) | |
| 332 cols.append(c_var) | |
| 333 cols.append(c_std) | |
| 334 elif test_id.strip() == "sigmaclip": | |
| 335 c, c_low, c_up = stats.sigmaclip( | |
| 336 list(map(float, sample_one)), low=args.m, high=args.n | |
| 337 ) | |
| 338 cols.append(c) | |
| 339 cols.append(c_low) | |
| 340 cols.append(c_up) | |
| 341 elif test_id.strip() == "kstest": | |
| 342 d, p_value = stats.kstest( | |
| 343 list(map(float, sample_one)), | |
| 344 cdf=args.cdf, | |
| 345 N=args.N, | |
| 346 alternative=args.alternative, | |
| 347 mode=args.mode, | |
| 348 ) | |
| 349 cols.append(d) | |
| 350 cols.append(p_value) | |
| 351 elif test_id.strip() == "chi2_contingency": | |
| 352 chi2, p, dof, ex = stats.chi2_contingency( | |
| 353 list(map(float, sample_one)), | |
| 354 correction=args.correction, | |
| 355 lambda_=args.lambda_, | |
| 356 ) | |
| 357 cols.append(chi2) | |
| 358 cols.append(p) | |
| 359 cols.append(dof) | |
| 360 cols.append(ex) | |
| 361 elif test_id.strip() == "tmean": | |
| 362 if nf == 0 and mf == 0: | |
| 363 mean = stats.tmean(list(map(float, sample_one))) | |
| 364 else: | |
| 365 mean = stats.tmean( | |
| 366 list(map(float, sample_one)), (mf, nf), (inclusive1, inclusive2) | |
| 367 ) | |
| 368 cols.append(mean) | |
| 369 elif test_id.strip() == "tmin": | |
| 370 if mf == 0: | |
| 371 min = stats.tmin(list(map(float, sample_one))) | |
| 372 else: | |
| 373 min = stats.tmin( | |
| 374 list(map(float, sample_one)), | |
| 375 lowerlimit=mf, | |
| 376 inclusive=args.inclusive, | |
| 377 ) | |
| 378 cols.append(min) | |
| 379 elif test_id.strip() == "tmax": | |
| 380 if nf == 0: | |
| 381 max = stats.tmax(list(map(float, sample_one))) | |
| 382 else: | |
| 383 max = stats.tmax( | |
| 384 list(map(float, sample_one)), | |
| 385 upperlimit=nf, | |
| 386 inclusive=args.inclusive, | |
| 387 ) | |
| 388 cols.append(max) | |
| 389 elif test_id.strip() == "tvar": | |
| 390 if nf == 0 and mf == 0: | |
| 391 var = stats.tvar(list(map(float, sample_one))) | |
| 392 else: | |
| 393 var = stats.tvar( | |
| 394 list(map(float, sample_one)), (mf, nf), (inclusive1, inclusive2) | |
| 395 ) | |
| 396 cols.append(var) | |
| 397 elif test_id.strip() == "tstd": | |
| 398 if nf == 0 and mf == 0: | |
| 399 std = stats.tstd(list(map(float, sample_one))) | |
| 400 else: | |
| 401 std = stats.tstd( | |
| 402 list(map(float, sample_one)), (mf, nf), (inclusive1, inclusive2) | |
| 403 ) | |
| 404 cols.append(std) | |
| 405 elif test_id.strip() == "tsem": | |
| 406 if nf == 0 and mf == 0: | |
| 407 s = stats.tsem(list(map(float, sample_one))) | |
| 408 else: | |
| 409 s = stats.tsem( | |
| 410 list(map(float, sample_one)), (mf, nf), (inclusive1, inclusive2) | |
| 411 ) | |
| 412 cols.append(s) | |
| 413 elif test_id.strip() == "scoreatpercentile": | |
| 414 if nf == 0 and mf == 0: | |
| 415 s = stats.scoreatpercentile( | |
| 416 list(map(float, sample_one)), | |
| 417 list(map(float, sample_two)), | |
| 418 interpolation_method=args.interpolation, | |
| 419 ) | |
| 420 else: | |
| 421 s = stats.scoreatpercentile( | |
| 422 list(map(float, sample_one)), | |
| 423 list(map(float, sample_two)), | |
| 424 (mf, nf), | |
| 425 interpolation_method=args.interpolation, | |
| 426 ) | |
| 427 for i in s: | |
| 428 cols.append(i) | |
| 429 elif test_id.strip() == "relfreq": | |
| 430 if nf == 0 and mf == 0: | |
| 431 rel, low_range, binsize, ex = stats.relfreq( | |
| 432 list(map(float, sample_one)), args.b | |
| 433 ) | |
| 434 else: | |
| 435 rel, low_range, binsize, ex = stats.relfreq( | |
| 436 list(map(float, sample_one)), args.b, (mf, nf) | |
| 437 ) | |
| 438 for i in rel: | |
| 439 cols.append(i) | |
| 440 cols.append(low_range) | |
| 441 cols.append(binsize) | |
| 442 cols.append(ex) | |
| 443 elif test_id.strip() == "binned_statistic": | |
| 444 if nf == 0 and mf == 0: | |
| 445 st, b_edge, b_n = stats.binned_statistic( | |
| 446 list(map(float, sample_one)), | |
| 447 list(map(float, sample_two)), | |
| 448 statistic=args.statistic, | |
| 449 bins=args.b, | |
| 450 ) | |
| 451 else: | |
| 452 st, b_edge, b_n = stats.binned_statistic( | |
| 453 list(map(float, sample_one)), | |
| 454 list(map(float, sample_two)), | |
| 455 statistic=args.statistic, | |
| 456 bins=args.b, | |
| 457 range=(mf, nf), | |
| 458 ) | |
| 459 cols.append(st) | |
| 460 cols.append(b_edge) | |
| 461 cols.append(b_n) | |
| 462 elif test_id.strip() == "threshold": | |
| 463 if nf == 0 and mf == 0: | |
| 464 o = stats.threshold(list(map(float, sample_one)), newval=args.new) | |
| 465 else: | |
| 466 o = stats.threshold( | |
| 467 list(map(float, sample_one)), mf, nf, newval=args.new | |
| 468 ) | |
| 469 for i in o: | |
| 470 cols.append(i) | |
| 471 elif test_id.strip() == "trimboth": | |
| 472 o = stats.trimboth( | |
| 473 list(map(float, sample_one)), proportiontocut=args.proportiontocut | |
| 474 ) | |
| 475 for i in o: | |
| 476 cols.append(i) | |
| 477 elif test_id.strip() == "trim1": | |
| 478 t1 = stats.trim1( | |
| 479 list(map(float, sample_one)), | |
| 480 proportiontocut=args.proportiontocut, | |
| 481 tail=args.tail, | |
| 482 ) | |
| 483 for i in t1: | |
| 484 cols.append(i) | |
| 485 elif test_id.strip() == "histogram": | |
| 486 if nf == 0 and mf == 0: | |
| 487 hi, low_range, binsize, ex = stats.histogram( | |
| 488 list(map(float, sample_one)), args.b | |
| 489 ) | |
| 490 else: | |
| 491 hi, low_range, binsize, ex = stats.histogram( | |
| 492 list(map(float, sample_one)), args.b, (mf, nf) | |
| 493 ) | |
| 494 cols.append(hi) | |
| 495 cols.append(low_range) | |
| 496 cols.append(binsize) | |
| 497 cols.append(ex) | |
| 498 elif test_id.strip() == "cumfreq": | |
| 499 if nf == 0 and mf == 0: | |
| 500 cum, low_range, binsize, ex = stats.cumfreq( | |
| 501 list(map(float, sample_one)), args.b | |
| 502 ) | |
| 503 else: | |
| 504 cum, low_range, binsize, ex = stats.cumfreq( | |
| 505 list(map(float, sample_one)), args.b, (mf, nf) | |
| 506 ) | |
| 507 cols.append(cum) | |
| 508 cols.append(low_range) | |
| 509 cols.append(binsize) | |
| 510 cols.append(ex) | |
| 511 elif test_id.strip() == "boxcox_normmax": | |
| 512 if nf == 0 and mf == 0: | |
| 513 ma = stats.boxcox_normmax(list(map(float, sample_one))) | |
| 514 else: | |
| 515 ma = stats.boxcox_normmax( | |
| 516 list(map(float, sample_one)), (mf, nf), method=args.method | |
| 517 ) | |
| 518 cols.append(ma) | |
| 519 elif test_id.strip() == "boxcox": | |
| 520 if imbda == 0: | |
| 521 box, ma, ci = stats.boxcox( | |
| 522 list(map(float, sample_one)), alpha=args.alpha | |
| 523 ) | |
| 524 cols.append(box) | |
| 525 cols.append(ma) | |
| 526 cols.append(ci) | |
| 527 else: | |
| 528 box = stats.boxcox( | |
| 529 list(map(float, sample_one)), imbda, alpha=args.alpha | |
| 530 ) | |
| 531 cols.append(box) | |
| 532 elif test_id.strip() == "histogram2": | |
| 533 h2 = stats.histogram2( | |
| 534 list(map(float, sample_one)), list(map(float, sample_two)) | |
| 535 ) | |
| 536 for i in h2: | |
| 537 cols.append(i) | |
| 538 elif test_id.strip() == "ranksums": | |
| 539 z_statistic, p_value = stats.ranksums( | |
| 540 list(map(float, sample_one)), list(map(float, sample_two)) | |
| 541 ) | |
| 542 cols.append(z_statistic) | |
| 543 cols.append(p_value) | |
| 544 elif test_id.strip() == "ttest_1samp": | |
| 545 t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) | |
| 546 for i in t: | |
| 547 cols.append(i) | |
| 548 for i in prob: | |
| 549 cols.append(i) | |
| 550 elif test_id.strip() == "ansari": | |
| 551 AB, p_value = stats.ansari( | |
| 552 list(map(float, sample_one)), list(map(float, sample_two)) | |
| 553 ) | |
| 554 cols.append(AB) | |
| 555 cols.append(p_value) | |
| 556 elif test_id.strip() == "linregress": | |
| 557 slope, intercept, r_value, p_value, stderr = stats.linregress( | |
| 558 list(map(float, sample_one)), list(map(float, sample_two)) | |
| 559 ) | |
| 560 cols.append(slope) | |
| 561 cols.append(intercept) | |
| 562 cols.append(r_value) | |
| 563 cols.append(p_value) | |
| 564 cols.append(stderr) | |
| 565 elif test_id.strip() == "pearsonr": | |
| 566 cor, p_value = stats.pearsonr( | |
| 567 list(map(float, sample_one)), list(map(float, sample_two)) | |
| 568 ) | |
| 569 cols.append(cor) | |
| 570 cols.append(p_value) | |
| 571 elif test_id.strip() == "pointbiserialr": | |
| 572 r, p_value = stats.pointbiserialr( | |
| 573 list(map(float, sample_one)), list(map(float, sample_two)) | |
| 574 ) | |
| 575 cols.append(r) | |
| 576 cols.append(p_value) | |
| 577 elif test_id.strip() == "ks_2samp": | |
| 578 d, p_value = stats.ks_2samp( | |
| 579 list(map(float, sample_one)), list(map(float, sample_two)) | |
| 580 ) | |
| 581 cols.append(d) | |
| 582 cols.append(p_value) | |
| 583 elif test_id.strip() == "mannwhitneyu": | |
| 584 mw_stats_u, p_value = stats.mannwhitneyu( | |
| 585 list(map(float, sample_one)), | |
| 586 list(map(float, sample_two)), | |
| 587 use_continuity=args.mwu_use_continuity, | |
| 588 ) | |
| 589 cols.append(mw_stats_u) | |
| 590 cols.append(p_value) | |
| 591 elif test_id.strip() == "zmap": | |
| 592 z = stats.zmap( | |
| 593 list(map(float, sample_one)), | |
| 594 list(map(float, sample_two)), | |
| 595 ddof=args.ddof, | |
| 596 ) | |
| 597 for i in z: | |
| 598 cols.append(i) | |
| 599 elif test_id.strip() == "ttest_ind": | |
| 600 mw_stats_u, p_value = stats.ttest_ind( | |
| 601 list(map(float, sample_one)), | |
| 602 list(map(float, sample_two)), | |
| 603 equal_var=args.equal_var, | |
| 604 ) | |
| 605 cols.append(mw_stats_u) | |
| 606 cols.append(p_value) | |
| 607 elif test_id.strip() == "ttest_rel": | |
| 608 t, prob = stats.ttest_rel( | |
| 609 list(map(float, sample_one)), | |
| 610 list(map(float, sample_two)), | |
| 611 axis=args.axis, | |
| 612 ) | |
| 613 cols.append(t) | |
| 614 cols.append(prob) | |
| 615 elif test_id.strip() == "mood": | |
| 616 z, p_value = stats.mood( | |
| 617 list(map(float, sample_one)), | |
| 618 list(map(float, sample_two)), | |
| 619 axis=args.axis, | |
| 620 ) | |
| 621 cols.append(z) | |
| 622 cols.append(p_value) | |
| 623 elif test_id.strip() == "shapiro": | |
| 624 W, p_value = stats.shapiro(list(map(float, sample_one))) | |
| 625 cols.append(W) | |
| 626 cols.append(p_value) | |
| 627 elif test_id.strip() == "kendalltau": | |
| 628 k, p_value = stats.kendalltau( | |
| 629 list(map(float, sample_one)), | |
| 630 list(map(float, sample_two)), | |
| 631 initial_lexsort=args.initial_lexsort, | |
| 632 ) | |
| 633 cols.append(k) | |
| 634 cols.append(p_value) | |
| 635 elif test_id.strip() == "entropy": | |
| 636 s = stats.entropy( | |
| 637 list(map(float, sample_one)), | |
| 638 list(map(float, sample_two)), | |
| 639 base=args.base, | |
| 640 ) | |
| 641 cols.append(s) | |
| 642 elif test_id.strip() == "spearmanr": | |
| 643 if sample2 == 1: | |
| 644 rho, p_value = stats.spearmanr( | |
| 645 list(map(float, sample_one)), list(map(float, sample_two)) | |
| 646 ) | |
| 647 else: | |
| 648 rho, p_value = stats.spearmanr(list(map(float, sample_one))) | |
| 649 cols.append(rho) | |
| 650 cols.append(p_value) | |
| 651 elif test_id.strip() == "wilcoxon": | |
| 652 if sample2 == 1: | |
| 653 T, p_value = stats.wilcoxon( | |
| 654 list(map(float, sample_one)), | |
| 655 list(map(float, sample_two)), | |
| 656 zero_method=args.zero_method, | |
| 657 correction=args.correction, | |
| 658 ) | |
| 659 else: | |
| 660 T, p_value = stats.wilcoxon( | |
| 661 list(map(float, sample_one)), | |
| 662 zero_method=args.zero_method, | |
| 663 correction=args.correction, | |
| 664 ) | |
| 665 cols.append(T) | |
| 666 cols.append(p_value) | |
| 667 elif test_id.strip() == "chisquare": | |
| 668 if sample2 == 1: | |
| 669 rho, p_value = stats.chisquare( | |
| 670 list(map(float, sample_one)), | |
| 671 list(map(float, sample_two)), | |
| 672 ddof=args.ddof, | |
| 673 ) | |
| 674 else: | |
| 675 rho, p_value = stats.chisquare( | |
| 676 list(map(float, sample_one)), ddof=args.ddof | |
| 677 ) | |
| 678 cols.append(rho) | |
| 679 cols.append(p_value) | |
| 680 elif test_id.strip() == "power_divergence": | |
| 681 if sample2 == 1: | |
| 682 stat, p_value = stats.power_divergence( | |
| 683 list(map(float, sample_one)), | |
| 684 list(map(float, sample_two)), | |
| 685 ddof=args.ddof, | |
| 686 lambda_=args.lambda_, | |
| 687 ) | |
| 688 else: | |
| 689 stat, p_value = stats.power_divergence( | |
| 690 list(map(float, sample_one)), ddof=args.ddof, lambda_=args.lambda_ | |
| 691 ) | |
| 692 cols.append(stat) | |
| 693 cols.append(p_value) | |
| 694 elif test_id.strip() == "theilslopes": | |
| 695 if sample2 == 1: | |
| 696 mpe, met, lo, up = stats.theilslopes( | |
| 697 list(map(float, sample_one)), | |
| 698 list(map(float, sample_two)), | |
| 699 alpha=args.alpha, | |
| 700 ) | |
| 701 else: | |
| 702 mpe, met, lo, up = stats.theilslopes( | |
| 703 list(map(float, sample_one)), alpha=args.alpha | |
| 704 ) | |
| 705 cols.append(mpe) | |
| 706 cols.append(met) | |
| 707 cols.append(lo) | |
| 708 cols.append(up) | |
| 709 elif test_id.strip() == "combine_pvalues": | |
| 710 if sample2 == 1: | |
| 711 stat, p_value = stats.combine_pvalues( | |
| 712 list(map(float, sample_one)), | |
| 713 method=args.med, | |
| 714 weights=list(map(float, sample_two)), | |
| 715 ) | |
| 716 else: | |
| 717 stat, p_value = stats.combine_pvalues( | |
| 718 list(map(float, sample_one)), method=args.med | |
| 719 ) | |
| 720 cols.append(stat) | |
| 721 cols.append(p_value) | |
| 722 elif test_id.strip() == "obrientransform": | |
| 723 ob = stats.obrientransform(*b_samples) | |
| 724 for i in ob: | |
| 725 elements = ",".join(list(map(str, i))) | |
| 726 cols.append(elements) | |
| 727 elif test_id.strip() == "f_oneway": | |
| 728 f_value, p_value = stats.f_oneway(*b_samples) | |
| 729 cols.append(f_value) | |
| 730 cols.append(p_value) | |
| 731 elif test_id.strip() == "kruskal": | |
| 732 h, p_value = stats.kruskal(*b_samples) | |
| 733 cols.append(h) | |
| 734 cols.append(p_value) | |
| 735 elif test_id.strip() == "friedmanchisquare": | |
| 736 fr, p_value = stats.friedmanchisquare(*b_samples) | |
| 737 cols.append(fr) | |
| 738 cols.append(p_value) | |
| 739 elif test_id.strip() == "fligner": | |
| 740 xsq, p_value = stats.fligner( | |
| 741 center=args.center, proportiontocut=args.proportiontocut, *b_samples | |
| 742 ) | |
| 743 cols.append(xsq) | |
| 744 cols.append(p_value) | |
| 745 elif test_id.strip() == "bartlett": | |
| 746 T, p_value = stats.bartlett(*b_samples) | |
| 747 cols.append(T) | |
| 748 cols.append(p_value) | |
| 749 elif test_id.strip() == "levene": | |
| 750 w, p_value = stats.levene( | |
| 751 center=args.center, proportiontocut=args.proportiontocut, *b_samples | |
| 752 ) | |
| 753 cols.append(w) | |
| 754 cols.append(p_value) | |
| 755 elif test_id.strip() == "median_test": | |
| 756 stat, p_value, m, table = stats.median_test( | |
| 757 ties=args.ties, | |
| 758 correction=args.correction, | |
| 759 lambda_=args.lambda_, | |
| 760 *b_samples | |
| 761 ) | |
| 762 cols.append(stat) | |
| 763 cols.append(p_value) | |
| 764 cols.append(m) | |
| 765 cols.append(table) | |
| 766 for i in table: | |
| 767 elements = ",".join(list(map(str, i))) | |
| 768 cols.append(elements) | |
| 769 outfile.write("%s\n" % "\t".join(list(map(str, cols)))) | |
| 770 outfile.close() | |
| 771 | |
| 772 | |
| 773 if __name__ == "__main__": | |
| 774 main() |
