comparison statistical_hypothesis_testing.xml @ 0:22ed769665b6 draft default tip

Uploaded
author bgruening
date Sun, 01 Feb 2015 18:35:40 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:22ed769665b6
1 <tool id="bg_statistical_hypothesis_testing" name="Statistical hypothesis testing" version="0.2">
2 <description></description>
3 <requirements>
4 <requirement type="binary">@EXECUTABLE@</requirement>
5 <requirement type="package" version="1.9">numpy</requirement>
6 <requirement type="package" version="0.14">scipy</requirement>
7 </requirements>
8 <macros>
9 <macro name="macro_sample_one_cols">
10 <param name="sample_one_cols" multiple="True" type="data_column" data_ref="infile" label="Column for sample one"/>
11 </macro>
12 <macro name="macro_sample_two_cols">
13 <param name="sample_two_cols" multiple="True" type="data_column" data_ref="infile" optional="True" label="Column for sample two"/>
14 </macro>
15 <macro name="macro_sample_cols_min2">
16 <repeat name="samples" title="more samples" min='2'>
17 <param name="sample_cols" multiple="True" type="data_column" data_ref="infile" label="Column for sample"/>
18 </repeat>
19 </macro>
20 <macro name="macro_sample_cols_min3">
21 <repeat name="samples" title="more samples" min='3'>
22 <param name="sample_cols" multiple="True" type="data_column" data_ref="infile" label="Column for sample"/>
23 </repeat>
24 </macro>
25 <macro name="macro_zero_method">
26 <param name="zero_method" type="select" label="pratt,wilcox,zsplit">
27 <option value="pratt">Pratt treatment: includes zero-differences in the ranking process</option>
28 <option value="wilcox">Wilcox treatment: discards all zero-differences</option>
29 <option value="zsplit">Zero rank split: just like Pratt, but spliting the zero rank between positive and negative ones</option>
30 </param>
31 </macro>
32 <macro name="macro_center">
33 <param name="center" type="select" label="Which function of the data to use in the test ">
34 <option value="median">median</option>
35 <option value="mean">mean</option>
36 <option value="trimmed">trimmed</option>
37 </param>
38 </macro>
39 <macro name="macro_interpolation">
40 <param name="interpolation" type="select" label="this specifies the interpolation method to use, when the desired quantile lies between two data points i and j">
41 <option value="fraction">fraction</option>
42 <option value="lower">lower</option>
43 <option value="higher">higher</option>
44 </param>
45 </macro>
46 <macro name="macro_ties">
47 <param name="ties" type="select" label="Determines how values equal to the grand median are classified in the contingency table">
48 <option value="below">below</option>
49 <option value="above">above</option>
50 <option value="ignore">ignore</option>
51 </param>
52 </macro>
53 <macro name="macro_method">
54 <param name="method" type="select" label="Maximizes the Pearson correlation coefficient">
55 <option value="pearsonr">pearsonr</option>
56 <option value="mle">mle</option>
57 <option value="all">all</option>
58 </param>
59 </macro>
60 <macro name="macro_dist">
61 <param name="dist" type="select" label="the type of distribution to test against. The default is ‘norm’ and ‘extreme1’ is a synonym for ‘gumbel’">
62 <option value="norm">norm</option>
63 <option value="expon">expon</option>
64 <option value="logistic">logistic</option>
65 <option value="gumbel">gumbel</option>
66 <option value="extreme1">extreme1</option>
67 </param>
68 </macro>
69 <macro name="macro_tail">
70 <param name="tail" type="select" label="From which tail">
71 <option value="right">right</option>
72 <option value="left">left</option>
73 </param>
74 </macro>
75 <macro name="macro_kind">
76 <param name="kind" type="select" label="This optional parameter specifies the interpretation of the resulting score">
77 <option value="rank">rank</option>
78 <option value="weak">weak</option>
79 <option value="strict">strict</option>
80 <option value="mean">mean</option>
81 </param>
82 </macro>
83 <macro name="macro_md">
84 <param name="md" type="select" label="The method used to assign ranks to tied elements">
85 <option value="average">average</option>
86 <option value="min">min</option>
87 <option value="max">max</option>
88 <option value="dense">dense</option>
89 <option value="ordinal">ordinal</option>
90 </param>
91 </macro>
92 <macro name="macro_statistic">
93 <param name="statistic" type="select" label="The statistic to compute ">
94 <option value="mean">mean</option>
95 <option value="median">median</option>
96 <option value="count">count</option>
97 <option value="sum">sum</option>
98 </param>
99 </macro>
100 <macro name="macro_alternative">
101 <param name="alternative" type="select" label="Defines the alternative hypothesis">
102 <option value="two-sided">two-sided</option>
103 <option value="less">less</option>
104 <option value="greater">greater</option>
105 </param>
106 </macro>
107 <macro name="macro_mode">
108 <param name="mode" type="select" label="Defines the distribution used for calculating the p-value">
109 <option value="approx">approx</option>
110 <option value="asymp">asymp</option>
111 </param>
112 </macro>
113 <macro name="macro_interpolation">
114 <param name="interpolation" type="select" label="this specifies the interpolation method to use, when the desired quantile lies between two data points i and j">
115 <option value="fraction">fraction</option>
116 <option value="lower">lower</option>
117 <option value="higher">higher</option>
118 </param>
119 </macro>
120 <macro name="macro_correction">
121 <param name="correction" type="boolean" truevalue="--correction" falsevalue="" checked="True" label="If True, and the degrees of freedom is 1, apply Yates’ correction for continuity."/>
122 </macro>
123 <macro name="macro_printextras">
124 <param name="printextras" type="boolean" truevalue="--printextras" falsevalue="" checked="False" label="printextras" help="If True, if there are extra points a warning is raised saying how many of those points there are" />
125 </macro>
126 <macro name="macro_initial_lexsort">
127 <param name="initial_lexsort" type="boolean" truevalue="--initial_lexsort" falsevalue="" checked="True" label="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs"/>
128 </macro>
129 <macro name="macro_cdf">
130 <param name="cdf" size="16" type="text" value="norm" label="If a string, it should be the name of a distribution in scipy.stats"/>
131 </macro>
132 <macro name="macro_ni">
133 <param name="ni" size="5" type="integer" value="20" label="N" optional="True" help="Sample size if rvs is string or callable."/>
134 </macro>
135 <macro name="macro_mwu_use_continuity">
136 <param name="mwu_use_continuity" type="boolean" label="Enable continuity correction" help="Whether a continuity correction (1/2.) should be taken into account." truevalue="--mwu_use_continuity" falsevalue="" checked="true" />
137 </macro>
138 <macro name="macro_equal_var">
139 <param name="equal_var" type="boolean" label="assume equal population" help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch’s t-test, which does not assume equal population variance." truevalue="--equal_var" falsevalue="" checked="true" />
140 </macro>
141 <macro name="macro_base">
142 <param name="base" size="5" type="float" value="1.6" label="base" help="The logarithmic base to use, defaults to e"/>
143 </macro>
144 <macro name="macro_med">
145 <param name="med" size="16" type="text" value="fisher" label="Name of method to use to combine p-values"/>
146 </macro>
147 <macro name="macro_reta">
148 <param name="reta" type="boolean" truevalue="--reta" falsevalue="" checked="False" label="Whether or not to return the internally computed a values." help="Whether or not to return the internally computed a values"/>
149 </macro>
150 <macro name="macro_n_in">
151 <param name="n" size="5" type="integer" value="1" label="the number of trials" help="This is ignored if x gives both the number of successes and failures"/>
152 </macro>
153 <macro name="macro_n_moment">
154 <param name="n" size="5" type="integer" value="1" label="moment" help="order of central moment that is returned"/>
155 </macro>
156 <macro name="macro_equal_var">
157 <param name="equal_var" type="boolean" label="assume equal population" help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch’s t-test, which does not assume equal population variance." truevalue="--equal_var" falsevalue="" checked="true" />
158 </macro>
159 <macro name="macro_imbda">
160 <param name="imbda" size="5" type="float" value="" label="imbda" optional="True" help="do the transformation for that value"/>
161 </macro>
162 <macro name="macro_ddof">
163 <param name="ddof" size="5" type="integer" value="0" label="ddof" optional="True" help="Degrees of freedom correction for standard deviation. "/>
164 </macro>
165 <macro name="macro_dtype">
166 <param name="dtype" size="16" type="text" value="" optional="True" label="Type of the returned array and of the accumulator in which the elements are summed"/>
167 </macro>
168 <macro name="macro_m">
169 <param name="m" size="5" type="float" value="4" label="low" help="Lower bound factor of sigma clipping. Default is 4."/>
170 </macro>
171 <macro name="macro_mf">
172 <param name="mf" size="5" type="float" value="" label="lower_limit" optional="True" help="lower values for the range of the histogram"/>
173 </macro>
174 <macro name="macro_nf">
175 <param name="nf" size="5" type="float" value="" label="upper_limit" optional="True" help="higher values for the range of the histogram"/>
176 </macro>
177 <macro name="macro_b">
178 <param name="b" size="5" type="integer" value="10" label="numbins" help="The number of bins to use for the histogram"/>
179 </macro>
180 <macro name="macro_proportiontocut">
181 <param name="proportiontocut" size="5" type="float" value="0.05" label="proportiontocut" optional="True" help="Proportion (in range 0-1) of total data set to trim of each end"/>
182 </macro>
183 <macro name="macro_alpha">
184 <param name="alpha" size="5" type="float" value="0.9" label="alpha" optional="True" help="Probability that the returned confidence interval contains the true parameter"/>
185 </macro>
186 <macro name="macro_score">
187 <param name="score" size="5" type="integer" value="0" label="score" optional="True" help="Score that is compared to the elements in a"/>
188 </macro>
189 <macro name="macro_axis">
190 <param name="axis" size="5" type="integer" value="0" label="0 means one-dimensional array" help="Axis along which the kurtosis is calculated"/>
191 </macro>
192 <macro name="macro_new">
193 <param name="new" size="5" type="float" value="0" label="newval" help="Value to put in place of values in a outside of bounds"/>
194 </macro>
195 <macro name="macro_fisher">
196 <param name="fisher" type="boolean" truevalue="--fisher" falsevalue="" checked="true" label="Fisher’s definition is used" help="If True, Fisher’s definition is used (normal ==> 0.0). If False, Pearson’s definition is used (normal ==> 3.0)." />
197 </macro>
198 <macro name="macro_b">
199 <param name="b" size="5" type="integer" value="10" label="numbins" help="The number of bins to use for the histogram"/>
200 </macro>
201 <macro name="macro_proportiontocut">
202 <param name="proportiontocut" size="5" type="float" value="0.05" label="proportiontocut" optional="True" help="Proportion (in range 0-1) of total data set to trim of each end"/>
203 </macro>
204 <macro name="macro_bias">
205 <param name="bias" type="boolean" truevalue="--bias" falsevalue="" checked="true" label="bias" help="If False, then the calculations are corrected for statistical bias." />
206 </macro>
207 <macro name="macro_lambda_">
208 <param name="lambda_" size="5" type="float" value="1" label="lambda_" optional="True" help="lambda_ gives the power in the Cressie-Read power divergence statistic."/>
209 </macro>
210 <macro name="macro_inclusive">
211 <param name="inclusive" type="boolean" truevalue="--inclusive" falsevalue="" checked="true" label="flag" help="These flags determine whether values exactly equal to the lower or upper limits are included" />
212 </macro>
213 <macro name="macro_p">
214 <param name="p" size="5" type="float" value="0.5" />
215 </macro>
216 <macro name="macro_inclusive1">
217 <param name="inclusive1" type="boolean" truevalue="--inclusive1" falsevalue="" checked="true" label="lower flag" help="These flags determine whether values exactly equal to the lower or upper limits are included" />
218 </macro>
219 <macro name="macro_inclusive2">
220 <param name="inclusive2" type="boolean" truevalue="--inclusive2" falsevalue="" checked="true" label="upper flag" help="These flags determine whether values exactly equal to the lower or upper limits are included" />
221 </macro>
222 <macro name="macro_inclusive">
223 <param name="inclusive" type="boolean" truevalue="--inclusive" falsevalue="" checked="true" label="flag" help="These flags determine whether values exactly equal to the lower or upper limits are included" />
224 </macro>
225 </macros>
226 <command interpreter="python">
227 statistical_hypothesis_testing.py
228 --infile "${infile}"
229 --outfile "${outfile}"
230 --test_id "${test_methods.test_methods_opts}"
231 #if str($test_methods.test_methods_opts) == "describe" or str($test_methods.test_methods_opts) == "mode" or str($test_methods.test_methods_opts) == "normaltest" or str($test_methods.test_methods_opts) == "kurtosistest" or str($test_methods.test_methods_opts) == "skewtest" or str($test_methods.test_methods_opts) == "nanmean" or str($test_methods.test_methods_opts) == "nanmedian" or str($test_methods.test_methods_opts) == "variation" or str($test_methods.test_methods_opts) == "itemfreq" or str($test_methods.test_methods_opts) == "kurtosistest" or str($test_methods.test_methods_opts) == "skewtest" or str($test_methods.test_methods_opts) == "nanmean" or str($test_methods.test_methods_opts) == "nanmedian" or str($test_methods.test_methods_opts) == "variation" or str($test_methods.test_methods_opts) == "tiecorrect":
232 --sample_one_cols "${test_methods.sample_one_cols}"
233 #elif str($test_methods.test_methods_opts) == "gmean" or str($test_methods.test_methods_opts) == "hmean":
234 --sample_one_cols "${test_methods.sample_one_cols}"
235 --dtype "${test_methods.dtype}"
236 #elif str($test_methods.test_methods_opts) == "anderson":
237 --sample_one_cols "${test_methods.sample_one_cols}"
238 --dist "${test_methods.dist}"
239 #elif str($test_methods.test_methods_opts) == "binom_test":
240 --sample_one_cols "${test_methods.sample_one_cols}"
241 --n "${test_methods.n}"
242 --p "${test_methods.p}"
243 #elif str($test_methods.test_methods_opts) == "kurtosis":
244 --sample_one_cols "${test_methods.sample_one_cols}"
245 --axis "${test_methods.axis}"
246 $test_methods.fisher
247 $test_methods.bias
248 #elif str($test_methods.test_methods_opts) == "moment":
249 --sample_one_cols "${test_methods.sample_one_cols}"
250 --n "${test_methods.n}"
251 #elif str($test_methods.test_methods_opts) == "bayes_mvs":
252 --sample_one_cols "${test_methods.sample_one_cols}"
253 --alpha "${test_methods.alpha}"
254 #elif str($test_methods.test_methods_opts) == "percentileofscore":
255 --sample_one_cols "${test_methods.sample_one_cols}"
256 --score "${test_methods.score}"
257 --kind "${test_methods.kind}"
258 #elif str($test_methods.test_methods_opts) == "sigmaclip":
259 --sample_one_cols "${test_methods.sample_one_cols}"
260 --n "${test_methods.n}"
261 --m "${test_methods.m}"
262 #elif str($test_methods.test_methods_opts) == "chi2_contingency":
263 --sample_one_cols "${test_methods.sample_one_cols}"
264 $test_methods.correction
265 #if str($test_methods.lambda_).strip():
266 --lambda_ "${test_methods.lambda_}"
267 #end if
268 #elif str($test_methods.test_methods_opts) == "skew" or str($test_methods.test_methods_opts) == "nanstd" :
269 --sample_one_cols "${test_methods.sample_one_cols}"
270 $test_methods.bias
271 #elif str($test_methods.test_methods_opts) == "rankdata":
272 --sample_one_cols "${test_methods.sample_one_cols}"
273 --md "${test_methods.md}"
274 #elif str($test_methods.test_methods_opts) == "sem" or str($test_methods.test_methods_opts) == "zscore" or str($test_methods.test_methods_opts) == "signaltonoise":
275 --sample_one_cols "${test_methods.sample_one_cols}"
276 #if str($test_methods.ddof).strip():
277 --ddof "${test_methods.ddof}"
278 #end if
279 #elif str($test_methods.test_methods_opts) == "trimboth":
280 --sample_one_cols "${test_methods.sample_one_cols}"
281 #if str($test_methods.proportiontocut).strip():
282 --proportiontocut "${test_methods.proportiontocut}"
283 #end if
284 #elif str($test_methods.test_methods_opts) == "trim1":
285 --sample_one_cols "${test_methods.sample_one_cols}"
286 #if str($test_methods.proportiontocut).strip():
287 --proportiontocut "${test_methods.proportiontocut}"
288 #end if
289 --tail "${test_methods.tail}"
290 #elif str($test_methods.test_methods_opts) == "boxcox":
291 --sample_one_cols "${test_methods.sample_one_cols}"
292 --alpha "${test_methods.alpha}"
293 #if str($test_methods.imbda).strip():
294 --imbda "${test_methods.imbda}"
295 #end if
296 #elif str($test_methods.test_methods_opts) == "boxcox_llf":
297 --sample_one_cols "${test_methods.sample_one_cols}"
298 --imbda "${test_methods.imbda}"
299 #elif str($test_methods.test_methods_opts) == "kstest":
300 --sample_one_cols "${test_methods.sample_one_cols}"
301 #if str($test_methods.ni).strip():
302 --ni "${test_methods.ni}"
303 #end if
304 --cdf "${test_methods.cdf}"
305 --alternative "${test_methods.alternative}"
306 --mode "${test_methods.mode}"
307
308 #elif str($test_methods.test_methods_opts) == "boxcox_normmax":
309 --sample_one_cols "${test_methods.sample_one_cols}"
310 #if str($test_methods.mf).strip():
311 --mf "${test_methods.mf}"
312 #end if
313 #if str($test_methods.nf).strip():
314 --nf "${test_methods.nf}"
315 #end if
316 --method "${test_methods.method}"
317 #elif str($test_methods.test_methods_opts) == "tmean" or str($test_methods.test_methods_opts) == "tvar" or str($test_methods.test_methods_opts) == "tstd" or str($test_methods.test_methods_opts) == "tsem":
318 --sample_one_cols "${test_methods.sample_one_cols}"
319 #if str($test_methods.mf).strip():
320 --mf "${test_methods.mf}"
321 #end if
322 #if str($test_methods.nf).strip():
323 --nf "${test_methods.nf}"
324 #end if
325 $test_methods.inclusive1
326 $test_methods.inclusive2
327 #elif str($test_methods.test_methods_opts) == "tmin":
328 --sample_one_cols "${test_methods.sample_one_cols}"
329 #if str($test_methods.mf).strip():
330 --mf "${test_methods.mf}"
331 #end if
332 $test_methods.inclusive
333 #elif str($test_methods.test_methods_opts) == "tmax":
334 --sample_one_cols "${test_methods.sample_one_cols}"
335 #if str($test_methods.nf).strip():
336 --nf "${test_methods.nf}"
337 #end if
338 $test_methods.inclusive
339 #elif str($test_methods.test_methods_opts) == "histogram":
340 --sample_one_cols "${test_methods.sample_one_cols}"
341 #if str($test_methods.mf).strip():
342 --mf "${test_methods.mf}"
343 #end if
344 #if str($test_methods.nf).strip():
345 --nf "${test_methods.nf}"
346 #end if
347 --b "${test_methods.b}"
348 $test_methods.printextras
349 #elif str($test_methods.test_methods_opts) == "cumfreq":
350 --sample_one_cols "${test_methods.sample_one_cols}"
351 #if str($test_methods.mf).strip():
352 --mf "${test_methods.mf}"
353 #end if
354 #if str($test_methods.nf).strip():
355 --nf "${test_methods.nf}"
356 #end if
357 --b "${test_methods.b}"
358 #elif str($test_methods.test_methods_opts) == "threshold":
359 --sample_one_cols "${test_methods.sample_one_cols}"
360 #if str($test_methods.mf).strip():
361 --mf "${test_methods.mf}"
362 #end if
363 #if str($test_methods.nf).strip():
364 --nf "${test_methods.nf}"
365 #end if
366 --new "${test_methods.new}"
367 #elif str($test_methods.test_methods_opts) == "relfreq":
368 --sample_one_cols "${test_methods.sample_one_cols}"
369 #if str($test_methods.mf).strip():
370 --mf "${test_methods.mf}"
371 #end if
372 #if str($test_methods.nf).strip():
373 --nf "${test_methods.nf}"
374 #end if
375 --b "${test_methods.b}"
376 #elif str($test_methods.test_methods_opts) == "spearmanr":
377 --sample_one_cols "${test_methods.sample_one_cols}"
378 #if str($test_methods.sample_two_cols).strip():
379 --sample_two_cols "${test_methods.sample_two_cols}"
380 #end if
381 #elif str($test_methods.test_methods_opts) == "theilslopes":
382 --sample_one_cols "${test_methods.sample_one_cols}"
383 #if str($test_methods.sample_two_cols).strip():
384 --sample_two_cols "${test_methods.sample_two_cols}"
385 #end if
386 --alpha "${test_methods.alpha}"
387 #elif str($test_methods.test_methods_opts) == "chisquare":
388 --sample_one_cols "${test_methods.sample_one_cols}"
389 #if str($test_methods.sample_two_cols).strip():
390 --sample_two_cols "${test_methods.sample_two_cols}"
391 #end if
392 #if str($test_methods.ddof).strip():
393 --ddof "${test_methods.ddof}"
394 #end if
395 #elif str($test_methods.test_methods_opts) == "power_divergence":
396 --sample_one_cols "${test_methods.sample_one_cols}"
397 #if str($test_methods.sample_two_cols).strip():
398 --sample_two_cols "${test_methods.sample_two_cols}"
399 #end if
400 #if str($test_methods.ddof).strip():
401 --ddof "${test_methods.ddof}"
402 #end if
403 #if str($test_methods.lambda_).strip():
404 --lambda_ "${test_methods.lambda_}"
405 #end if
406 #elif str($test_methods.test_methods_opts) == "combine_pvalues":
407 --sample_one_cols "${test_methods.sample_one_cols}"
408 #if str($test_methods.sample_two_cols).strip() and $test_methods.sample_two_cols:
409 --sample_two_cols "${test_methods.sample_two_cols}"
410 #end if
411 --med "${test_methods.med}"
412 #elif str($test_methods.test_methods_opts) == "wilcoxon":
413 --sample_one_cols "${test_methods.sample_one_cols}"
414 #if str($test_methods.sample_two_cols).strip() and $test_methods.sample_two_cols:
415 --sample_two_cols "${test_methods.sample_two_cols}"
416 #end if
417 --zero_method "${test_methods.zero_method}"
418 $test_methods.correction
419 #elif str($test_methods.test_methods_opts) == "ranksums" or str($test_methods.test_methods_opts) == "ansari" or str($test_methods.test_methods_opts) == "linregress" or str($test_methods.test_methods_opts) == "pearsonr" or str($test_methods.test_methods_opts) == "pointbiserialr" or str($test_methods.test_methods_opts) == "ks_2samp" or str($test_methods.test_methods_opts) == "ttest_1samp" or str($test_methods.test_methods_opts) == "histogram2":
420 --sample_one_cols "${test_methods.sample_one_cols}"
421 --sample_two_cols "${test_methods.sample_two_cols}"
422 #elif str($test_methods.test_methods_opts) == "entropy":
423 --sample_one_cols "${test_methods.sample_one_cols}"
424 --sample_two_cols "${test_methods.sample_two_cols}"
425 --base "${test_methods.base}"
426 #elif str($test_methods.test_methods_opts) == "kendalltau":
427 --sample_one_cols "${test_methods.sample_one_cols}"
428 --sample_two_cols "${test_methods.sample_two_cols}"
429 $test_methods.initial_lexsort
430 #elif str($test_methods.test_methods_opts) == "kendalltau":
431 --sample_one_cols "${test_methods.sample_one_cols}"
432 --sample_two_cols "${test_methods.sample_two_cols}"
433 $test_methods.initial_lexsort
434 #elif str($test_methods.test_methods_opts) == "mannwhitneyu":
435 --sample_one_cols "${test_methods.sample_one_cols}"
436 --sample_two_cols "${test_methods.sample_two_cols}"
437 $test_methods.mwu_use_continuity
438 #elif str($test_methods.test_methods_opts) == "ttest_ind":
439 --sample_one_cols "${test_methods.sample_one_cols}"
440 --sample_two_cols "${test_methods.sample_two_cols}"
441 $test_methods.equal_var
442 #elif str($test_methods.test_methods_opts) == "ttest_rel":
443 --sample_one_cols "${test_methods.sample_one_cols}"
444 --sample_two_cols "${test_methods.sample_two_cols}"
445 --axis "${test_methods.axis}"
446 #elif str($test_methods.test_methods_opts) == "zmap":
447 --sample_one_cols "${test_methods.sample_one_cols}"
448 --sample_two_cols "${test_methods.sample_two_cols}"
449 #if str($test_methods.ddof).strip():
450 --ddof "${test_methods.ddof}"
451 #end if
452 #elif str($test_methods.test_methods_opts) == "binned_statistic":
453 --sample_one_cols "${test_methods.sample_one_cols}"
454 --sample_two_cols "${test_methods.sample_two_cols}"
455 #if str($test_methods.mf).strip():
456 --mf "${test_methods.mf}"
457 #end if
458 #if str($test_methods.nf).strip():
459 --nf "${test_methods.nf}"
460 #end if
461 --statistic "${test_methods.statistic}"
462 --b "${test_methods.b}"
463 #elif str($test_methods.test_methods_opts) == "scoreatpercentile":
464 --sample_one_cols "${test_methods.sample_one_cols}"
465 --sample_two_cols "${test_methods.sample_two_cols}"
466 #if str($test_methods.mf).strip():
467 --mf "${test_methods.mf}"
468 #end if
469 #if str($test_methods.nf).strip():
470 --nf "${test_methods.nf}"
471 #end if
472 --interpolation "${test_methods.interpolation}"
473 #elif str($test_methods.test_methods_opts) == "mood":
474 --axis "${test_methods.axis}"
475 --sample_one_cols "${test_methods.sample_one_cols}"
476 --sample_two_cols "${test_methods.sample_two_cols}"
477 #elif str($test_methods.test_methods_opts) == "shapiro":
478 $test_methods.reta
479 --sample_one_cols "${test_methods.sample_one_cols}"
480 --sample_two_cols "${test_methods.sample_two_cols}"
481 #elif str($test_methods.test_methods_opts) == "bartlett" or str($test_methods.test_methods_opts) == "f_oneway" or str($test_methods.test_methods_opts) == "kruskal" or str($test_methods.test_methods_opts) == "friedmanchisquare" or str($test_methods.test_methods_opts) == "obrientransform":
482 --sample_cols "#echo ';'.join( [str($list.sample_cols) for $list in $test_methods.samples] )#"
483 #elif str($test_methods.test_methods_opts) == "levene":
484 --sample_cols "#echo ';'.join( [str($list.sample_cols) for $list in $test_methods.samples] )#"
485 --center "${test_methods.center}"
486 #if str($test_methods.proportiontocut).strip():
487 --proportiontocut "${test_methods.proportiontocut}"
488 #end if
489 #elif str($test_methods.test_methods_opts) == "fligner":
490 --sample_cols "#echo ';'.join( [str($list.sample_cols) for $list in $test_methods.samples] )#"
491 --center "${test_methods.center}"
492 #if str($test_methods.proportiontocut).strip():
493 --proportiontocut "${test_methods.proportiontocut}"
494 #end if
495 #elif str($test_methods.test_methods_opts) == "median_test":
496 --sample_cols "#echo ';'.join( [str($list.sample_cols) for $list in $test_methods.samples] )#"
497 $test_methods.correction
498 #if str($test_methods.lambda_).strip():
499 --lambda_ "${test_methods.lambda_}"
500 #end if
501 --ties "${test_methods.ties}"
502 #end if
503 </command>
504 <inputs>
505 <param name="infile" type="data" format="tabular" label="Sample file" help="tabular file containing the observations"/>
506 <conditional name="test_methods">
507 <param name="test_methods_opts" type="select" label="Select a statistical test method">
508 <option value="describe">Computes several descriptive statistics of the passed array</option>
509 <option value="gmean">Compute the geometric mean along the specified axis</option>
510 <option value="hmean">Calculates the harmonic mean along the specified axis</option>
511 <option value="kurtosis">Computes the kurtosis (Fisher or Pearson) of a dataset</option>
512 <option value="kurtosistest">Tests whether a dataset has normal kurtosis</option>
513 <option value="mode">show the most common value in the passed array</option>
514 <option value="moment">Calculates the nth moment about the mean for a sample</option>
515 <option value="normaltest">Tests whether a sample differs from a normal distribution</option>
516 <option value="skew">Computes the skewness of a data set.</option>
517 <option value="skewtest">Tests whether the skew is different from the normal distribution.</option>
518 <option value="tmean">Compute the trimmed mean</option>
519 <option value="tvar">Compute the trimmed variance</option>
520 <option value="tmin">Compute the trimmed minimum</option>
521 <option value="tmax">Compute the trimmed maximum</option>
522 <option value="tstd">Compute the trimmed sample standard deviation</option>
523 <option value="tsem">Compute the trimmed standard error of the mean</option>
524 <option value="nanmean">Compute the mean ignoring nans</option>
525 <option value="nanstd">Compute the standard deviation ignoring nans</option>
526 <option value="nanmedian">Compute the median ignoring nan values.</option>
527 <option value="variation">Computes the coefficient of variation, the ratio of the biased standard deviation to the mean.</option>
528 <option value="cumfreq">Returns a cumulative frequency histogram, using the histogram function</option>
529 <option value="histogram2">Compute histogram using divisions in bins</option>
530 <option value="histogram">Separates the range into several bins</option>
531 <option value="itemfreq">Compute frequencies for each number</option>
532 <option value="percentileofscore">The percentile rank of a score relative to a list of scores</option>
533 <option value="scoreatpercentile">Calculate the score at a given percentile of the input sequence</option>
534 <option value="relfreq">Returns a relative frequency histogram, using the histogram function</option>
535 <option value="binned_statistic">Compute a binned statistic for a set of data</option>
536 <option value="obrientransform">Computes the O’Brien transform on input data</option>
537 <option value="signaltonoise">The signal-to-noise ratio of the input data</option>
538 <option value="bayes_mvs">Bayesian confidence intervals for the mean, var, and std</option>
539 <option value="sem">Calculates the standard error of the mean of the value</option>
540 <option value="zmap">Calculates the relative z-scores</option>
541 <option value="zscore">Calculates the z score of each value in the sample, relative to the sample mean and standard deviation</option>
542 <option value="sigmaclip">Iterative sigma-clipping of array elements</option>
543 <option value="threshold">Clip array to a given value</option>
544 <option value="trimboth">Slices off a proportion of items from both ends of an array</option>
545 <option value="trim1">Slices off a proportion of items from ONE end of the passed array distribution</option>
546 <option value="f_oneway">Performs a 1-way ANOVA</option>
547 <option value="pearsonr">Calculates a Pearson correlation coefficient and the p-value for testing non-correlation.</option>
548 <option value="spearmanr">Calculates a Spearman rank-order correlation coefficient and the p-value to test for non-correlation</option>
549 <option value="pointbiserialr">Calculates a point biserial correlation coefficient and the associated p-value</option>
550 <option value="kendalltau">Calculates Kendall’s tau, a correlation measure for ordinal data</option>
551 <option value="linregress">This computes a least-squares regression for two sets of measurements</option>
552 <option value="theilslopes">Computes the Theil-Sen estimator for a set of points (x, y)</option>
553 <option value="ttest_1samp">Calculates the T-test for the mean of ONE group of scores</option>
554 <option value="ttest_ind">T-test for the means of TWO INDEPENDENT samples of scores</option>
555 <option value="ttest_rel">T-test for the means of TWO RELATED samples of scores</option>
556 <option value="kstest">Perform the Kolmogorov-Smirnov test for goodness of fit.</option>
557 <option value="chisquare">Calculates a one-way chi square test</option>
558 <option value="power_divergence">Cressie-Read power divergence statistic and goodness of fit test</option>
559 <option value="ks_2samp">Computes the Kolmogorov-Smirnov statistic on 2 samples</option>
560 <option value="mannwhitneyu">Computes the Mann-Whitney rank test on samples x and y</option>
561 <option value="tiecorrect">Tie correction factor for ties in the Mann-Whitney U and Kruskal-Wallis H tests</option>
562 <option value="rankdata">Assign ranks to data, dealing with ties appropriately</option>
563 <option value="ranksums">Compute the Wilcoxon rank-sum statistic for two samples</option>
564 <option value="wilcoxon">Calculate the Wilcoxon signed-rank test</option>
565 <option value="kruskal">Compute the Kruskal-Wallis H-test for independent samples</option>
566 <option value="friedmanchisquare">Computes the Friedman test for repeated measurements</option>
567 <option value="combine_pvalues">Methods for combining the p-values of independent tests bearing upon the same hypothesis</option>
568 <option value="ansari">Perform the Ansari-Bradley test for equal scale parameters</option>
569 <option value="bartlett">Perform Bartlett’s test for equal variances</option>
570 <option value="levene">Perform Levene test for equal variances.</option>
571 <option value="shapiro">Perform the Shapiro-Wilk test for normality</option>
572 <option value="anderson">Anderson-Darling test for data coming from a particular distribution</option>
573 <option value="binom_test">Perform a test that the probability of success is p</option>
574 <option value="fligner">Perform Fligner’s test for equal variances</option>
575 <option value="median_test">Mood’s median test</option>
576 <option value="mood">Perform Mood’s test for equal scale parameters</option>
577 <option value="boxcox">Return a positive dataset transformed by a Box-Cox power transformation</option>
578 <option value="boxcox_normmax">Compute optimal Box-Cox transform parameter for input data</option>
579 <option value="boxcox_llf">The boxcox log-likelihood function</option>
580 <option value="boxcox">Return a positive dataset transformed by a Box-Cox power transformation</option>
581 <option value="entropy">Calculate the entropy of a distribution for given probability values</option>
582 <option value="chi2_contingency">Chi-square test of independence of variables in a contingency table</option>
583 </param>
584 <when value="itemfreq">
585 <expand macro="macro_sample_one_cols"/>
586 </when>
587 <when value="sem">
588 <expand macro="macro_sample_one_cols"/>
589 <expand macro="macro_ddof"/>
590 </when>
591 <when value="zscore">
592 <expand macro="macro_sample_one_cols"/>
593 <expand macro="macro_ddof"/>
594 </when>
595 <when value="relfreq">
596 <expand macro="macro_sample_one_cols"/>
597 <expand macro="macro_mf"/>
598 <expand macro="macro_nf"/>
599 <expand macro="macro_b"/>
600 </when>
601 <when value="signaltonoise">
602 <expand macro="macro_sample_one_cols"/>
603 <expand macro="macro_ddof"/>
604 </when>
605 <when value="bayes_mvs">
606 <expand macro="macro_sample_one_cols"/>
607 <expand macro="macro_alpha"/>
608 </when>
609 <when value="threshold">
610 <expand macro="macro_sample_one_cols"/>
611 <expand macro="macro_mf"/>
612 <expand macro="macro_nf"/>
613 <expand macro="macro_new"/>
614 </when>
615 <when value="trimboth">
616 <expand macro="macro_sample_one_cols"/>
617 <expand macro="macro_proportiontocut"/>
618 </when>
619 <when value="trim1">
620 <expand macro="macro_sample_one_cols"/>
621 <expand macro="macro_proportiontocut"/>
622 <expand macro="macro_tail"/>
623 </when>
624 <when value="percentileofscore">
625 <expand macro="macro_sample_one_cols"/>
626 <expand macro="macro_score"/>
627 <expand macro="macro_kind"/>
628 </when>
629 <when value="normaltest">
630 <expand macro="macro_sample_one_cols"/>
631 </when>
632 <when value="kurtosistest">
633 <expand macro="macro_sample_one_cols"/>
634 </when>
635 <when value="describe">
636 <expand macro="macro_sample_one_cols"/>
637 </when>
638 <when value="mode">
639 <expand macro="macro_sample_one_cols"/>
640 </when>
641 <when value="normaltest">
642 <expand macro="macro_sample_one_cols"/>
643 </when>
644 <when value="kurtosistest">
645 <expand macro="macro_sample_one_cols"/>
646 </when>
647 <when value="skewtest">
648 <expand macro="macro_sample_one_cols"/>
649 </when>
650 <when value="nanmean">
651 <expand macro="macro_sample_one_cols"/>
652 </when>
653 <when value="nanmedian">
654 <expand macro="macro_sample_one_cols"/>
655 </when>
656 <when value="variation">
657 <expand macro="macro_sample_one_cols"/>
658 </when>
659 <when value="tiecorrect">
660 <expand macro="macro_sample_one_cols"/>
661 </when>
662 <when value="gmean">
663 <expand macro="macro_sample_one_cols"/>
664 <expand macro="macro_dtype"/>
665 </when>
666 <when value="hmean">
667 <expand macro="macro_sample_one_cols"/>
668 <expand macro="macro_dtype"/>
669 </when>
670 <when value="sigmaclip">
671 <expand macro="macro_sample_one_cols"/>
672 <expand macro="macro_m"/>
673 <expand macro="macro_n_in"/>
674 </when>
675 <when value="kurtosis">
676 <expand macro="macro_sample_one_cols"/>
677 <expand macro="macro_axis"/>
678 <expand macro="macro_fisher"/>
679 <expand macro="macro_bias"/>
680 </when>
681 <when value="chi2_contingency">
682 <expand macro="macro_sample_one_cols"/>
683 <expand macro="macro_correction"/>
684 <expand macro="macro_lambda_"/>
685 </when>
686 <when value="binom_test">
687 <expand macro="macro_sample_one_cols"/>
688 <expand macro="macro_n_in"/>
689 <expand macro="macro_p"/>
690 </when>
691 <when value="moment">
692 <expand macro="macro_sample_one_cols"/>
693 <expand macro="macro_n_moment"/>
694 </when>
695 <when value="skew">
696 <expand macro="macro_sample_one_cols"/>
697 <expand macro="macro_bias"/>
698 </when>
699 <when value="tmean">
700 <expand macro="macro_sample_one_cols"/>
701 <expand macro="macro_mf"/>
702 <expand macro="macro_nf"/>
703 <expand macro="macro_inclusive1"/>
704 <expand macro="macro_inclusive2"/>
705 </when>
706 <when value="tmin">
707 <expand macro="macro_sample_one_cols"/>
708 <expand macro="macro_mf"/>
709 <expand macro="macro_inclusive"/>
710 </when>
711 <when value="tmax">
712 <expand macro="macro_sample_one_cols"/>
713 <expand macro="macro_nf"/>
714 <expand macro="macro_inclusive"/>
715 </when>
716 <when value="tvar">
717 <expand macro="macro_sample_one_cols"/>
718 <expand macro="macro_mf"/>
719 <expand macro="macro_nf"/>
720 <expand macro="macro_inclusive1"/>
721 <expand macro="macro_inclusive2"/>
722 </when>
723 <when value="tstd">
724 <expand macro="macro_sample_one_cols"/>
725 <expand macro="macro_mf"/>
726 <expand macro="macro_nf"/>
727 <expand macro="macro_inclusive1"/>
728 <expand macro="macro_inclusive2"/>
729 </when>
730 <when value="tsem">
731 <expand macro="macro_sample_one_cols"/>
732 <expand macro="macro_mf"/>
733 <expand macro="macro_nf"/>
734 <expand macro="macro_inclusive1"/>
735 <expand macro="macro_inclusive2"/>
736 </when>
737 <when value="nanstd">
738 <expand macro="macro_sample_one_cols"/>
739 <expand macro="macro_bias"/>
740 </when>
741 <when value="histogram">
742 <expand macro="macro_sample_one_cols"/>
743 <expand macro="macro_mf"/>
744 <expand macro="macro_nf"/>
745 <expand macro="macro_b"/>
746 <expand macro="macro_printextras"/>
747
748 </when>
749 <when value="cumfreq">
750 <expand macro="macro_sample_one_cols"/>
751 <expand macro="macro_mf"/>
752 <expand macro="macro_nf"/>
753 <expand macro="macro_b"/>
754 </when>
755 <when value="boxcox">
756 <expand macro="macro_sample_one_cols"/>
757 <expand macro="macro_imbda"/>
758 <expand macro="macro_alpha"/>
759 </when>
760 <when value="boxcox_llf">
761 <expand macro="macro_sample_one_cols"/>
762 <expand macro="macro_imbda"/>
763 </when>
764 <when value="boxcox_normmax">
765 <expand macro="macro_sample_one_cols"/>
766 <expand macro="macro_mf"/>
767 <expand macro="macro_nf"/>
768 <expand macro="macro_method"/>
769 </when>
770 <when value="anderson">
771 <expand macro="macro_sample_one_cols"/>
772 <expand macro="macro_dist"/>
773 </when>
774 <when value="rankdata">
775 <expand macro="macro_sample_one_cols"/>
776 <expand macro="macro_md"/>
777 </when>
778 <when value="kstest">
779 <expand macro="macro_sample_one_cols"/>
780 <expand macro="macro_cdf"/>
781 <expand macro="macro_ni"/>
782 <expand macro="macro_alternative"/>
783 <expand macro="macro_mode"/>
784 </when>
785
786 <when value="spearmanr">
787 <expand macro="macro_sample_one_cols"/>
788 <expand macro="macro_sample_two_cols"/>
789 </when>
790 <when value="ranksums">
791 <expand macro="macro_sample_one_cols"/>
792 <expand macro="macro_sample_two_cols"/>
793 </when>
794 <when value="ansari">
795 <expand macro="macro_sample_one_cols"/>
796 <expand macro="macro_sample_two_cols"/>
797 </when>
798 <when value="linregress">
799 <expand macro="macro_sample_one_cols"/>
800 <expand macro="macro_sample_two_cols"/>
801 </when>
802 <when value="histogram2">
803 <expand macro="macro_sample_one_cols"/>
804 <expand macro="macro_sample_two_cols"/>
805 </when>
806 <when value="pearsonr">
807 <expand macro="macro_sample_one_cols"/>
808 <expand macro="macro_sample_two_cols"/>
809 </when>
810 <when value="pointbiserialr">
811 <expand macro="macro_sample_one_cols"/>
812 <expand macro="macro_sample_two_cols"/>
813 </when>
814 <when value="ttest_1samp">
815 <expand macro="macro_sample_one_cols"/>
816 <expand macro="macro_sample_two_cols"/>
817 </when>
818 <when value="ks_2samp">
819 <expand macro="macro_sample_one_cols"/>
820 <expand macro="macro_sample_two_cols"/>
821 </when>
822 <when value="kendalltau">
823 <expand macro="macro_sample_one_cols"/>
824 <expand macro="macro_sample_two_cols"/>
825 <expand macro="macro_initial_lexsort"/>
826
827 </when>
828 <when value="mannwhitneyu">
829 <expand macro="macro_sample_one_cols"/>
830 <expand macro="macro_sample_two_cols"/>
831 <expand macro="macro_mwu_use_continuity"/>
832 </when>
833 <when value="ttest_ind">
834 <expand macro="macro_sample_one_cols"/>
835 <expand macro="macro_sample_two_cols"/>
836 <expand macro="macro_equal_var"/>
837 </when>
838 <when value="ttest_rel">
839 <expand macro="macro_sample_one_cols"/>
840 <expand macro="macro_sample_two_cols"/>
841 <expand macro="macro_axis"/>
842 </when>
843 <when value="entropy">
844 <expand macro="macro_sample_one_cols"/>
845 <expand macro="macro_sample_two_cols"/>
846 <expand macro="macro_base"/>
847 </when>
848 <when value="theilslopes">
849 <expand macro="macro_sample_one_cols"/>
850 <expand macro="macro_sample_two_cols"/>
851 <expand macro="macro_alpha"/>
852 </when>
853 <when value="zmap">
854 <expand macro="macro_sample_one_cols"/>
855 <expand macro="macro_sample_two_cols"/>
856 <expand macro="macro_ddof"/>
857 </when>
858 <when value="chisquare">
859 <expand macro="macro_sample_one_cols"/>
860 <expand macro="macro_sample_two_cols"/>
861 <expand macro="macro_ddof"/>
862 </when>
863 <when value="power_divergence">
864 <expand macro="macro_sample_one_cols"/>
865 <expand macro="macro_sample_two_cols"/>
866 <expand macro="macro_lambda_"/>
867 <expand macro="macro_ddof"/>
868 </when>
869 <when value="combine_pvalues">
870 <expand macro="macro_sample_one_cols"/>
871 <expand macro="macro_sample_two_cols"/>
872 <expand macro="macro_med"/>
873 </when>
874 <when value="mood">
875 <expand macro="macro_sample_one_cols"/>
876 <expand macro="macro_sample_two_cols"/>
877 <expand macro="macro_axis"/>
878 </when>
879 <when value="shapiro">
880 <expand macro="macro_sample_one_cols"/>
881 <expand macro="macro_sample_two_cols"/>
882 <expand macro="macro_reta"/>
883 </when>
884 <when value="wilcoxon">
885 <expand macro="macro_sample_one_cols"/>
886 <expand macro="macro_sample_two_cols"/>
887 <expand macro="macro_zero_method"/>
888 <expand macro="macro_correction"/>
889 </when>
890 <when value="scoreatpercentile">
891 <expand macro="macro_sample_one_cols"/>
892 <expand macro="macro_sample_two_cols"/>
893 <expand macro="macro_mf"/>
894 <expand macro="macro_nf"/>
895 <expand macro="macro_interpolation"/>
896 </when>
897 <when value="binned_statistic">
898 <expand macro="macro_sample_one_cols"/>
899 <expand macro="macro_sample_two_cols"/>
900 <expand macro="macro_mf"/>
901 <expand macro="macro_nf"/>
902 <expand macro="macro_b"/>
903 <expand macro="macro_statistic"/>
904 </when>
905 <when value="fligner">
906 <expand macro="macro_proportiontocut"/>
907 <expand macro="macro_center"/>
908 <expand macro="macro_sample_cols_min2"/>
909 </when>
910 <when value="f_oneway">
911 <expand macro="macro_sample_cols_min2"/>
912 </when>
913 <when value="kruskal">
914 <expand macro="macro_sample_cols_min2"/>
915 </when>
916 <when value="friedmanchisquare">
917 <expand macro="macro_sample_cols_min3"/>
918 </when>
919 <when value="bartlett">
920 <expand macro="macro_sample_cols_min2"/>
921 </when>
922 <when value="levene">
923 <expand macro="macro_proportiontocut"/>
924 <expand macro="macro_center"/>
925 <expand macro="macro_sample_cols_min2"/>
926 </when>
927 <when value="obrientransform">
928 <expand macro="macro_sample_cols_min2"/>
929 </when>
930 <when value="median_test">
931 <expand macro="macro_ties"/>
932 <expand macro="macro_correction"/>
933 <expand macro="macro_lambda_"/>
934 <expand macro="macro_sample_cols_min2"/>
935 </when>
936 </conditional>
937 </inputs>
938 <outputs>
939 <data format="tabular" name="outfile" label="${tool.name} on ${on_string}" />
940 </outputs>
941 <tests>
942 <test>
943 <param name="infile" value="input.tabular"/>
944 <output name="outfile" file="boxcox_normmax2.tabular"/>
945 <param name="sample_one_cols" value="1,2,3,4"/>
946 <param name="test_methods_opts" value="boxcox_normmax"/>
947 <param name="method" value="pearsonr"/>
948 <param name="mf" value="-2.0"/>
949 <param name="nf" value="2.0"/>
950 </test>
951 <test>
952 <param name="infile" value="input.tabular"/>
953 <output name="outfile" file="normaltest.tabular"/>
954 <param name="sample_one_cols" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24"/>
955 <param name="test_methods_opts" value="normaltest"/>
956 </test>
957 <test>
958 <param name="infile" value="input.tabular"/>
959 <output name="outfile" file="tmin.tabular"/>
960 <param name="sample_one_cols" value="1,2,3,4,5,6"/>
961 <param name="test_methods_opts" value="tmin"/>
962 <param name="mf" value="10.0"/>
963 <param name="inclusive" value="True"/>
964 </test>
965 <test>
966 <param name="infile" value="input.tabular"/>
967 <output name="outfile" file="shapiro2.tabular"/>
968 <param name="sample_one_cols" value="1,2,3,4,8,9"/>
969 <param name="sample_two_cols" value="5,6,7"/>
970 <param name="test_methods_opts" value="shapiro"/>
971 <param name="reta" value="True"/>
972 </test>
973 <test>
974 <param name="infile" value="input.tabular"/>
975 <output name="outfile" file="obrientransform.tabular"/>
976 <repeat name="samples">
977 <param name="sample_cols" value="1,2,3,4"/>
978 </repeat>
979 <repeat name="samples">
980 <param name="sample_cols" value="5,6,7,8"/>
981 </repeat>
982 <param name="test_methods_opts" value="obrientransform"/>
983 </test>
984 <test>
985 <param name="infile" value="input.tabular"/>
986 <output name="outfile" file="median_test_result1.tabular"/>
987 <repeat name="samples">
988 <param name="sample_cols" value="1,2,3,4"/>
989 </repeat>
990 <repeat name="samples">
991 <param name="sample_cols" value="5,6,7,8"/>
992 </repeat>
993 <repeat name="samples">
994 <param name="sample_cols" value="9,10,11,12"/>
995 </repeat>
996 <param name="test_methods_opts" value="median_test"/>
997 <param name="ties" value="above"/>
998 <param name="correction" value="True"/>
999 <param name="lambda_" value="1"/>
1000 </test>
1001 <test>
1002 <param name="infile" value="input.tabular"/>
1003 <output name="outfile" file="wilcoxon_result1.tabular"/>
1004 <param name="sample_one_cols" value="1,2,3,4,5,6,7,8,9,10"/>
1005 <param name="sample_two_cols" value="11,12,13,14,15,16,17,18,19,20"/>
1006 <param name="test_methods_opts" value="wilcoxon"/>
1007 <param name="zero_method" value="pratt"/>
1008 <param name="correction" value="False"/>
1009 </test>
1010 <test>
1011 <param name="infile" value="input.tabular"/>
1012 <output name="outfile" file="percentileofscore1.tabular"/>
1013 <param name="sample_one_cols" value="1,2,3,4"/>
1014 <param name="sample_two_cols" value="5,6,7,8"/>
1015 <param name="test_methods_opts" value="percentileofscore"/>
1016 <param name="score" value="1"/>
1017 <param name="kind" value="rank"/>
1018 </test>
1019 <test>
1020 <param name="infile" value="input.tabular"/>
1021 <output name="outfile" file="percentileofscore2.tabular"/>
1022 <param name="sample_one_cols" value="1,2,3,4"/>
1023 <param name="sample_two_cols" value="5,6,7,8"/>
1024 <param name="test_methods_opts" value="percentileofscore"/>
1025 <param name="score" value="2"/>
1026 <param name="kind" value="mean"/>
1027 </test>
1028 <test>
1029 <param name="infile" value="input.tabular"/>
1030 <output name="outfile" file="trim1.tabular"/>
1031 <param name="sample_one_cols" value="1,2,3,4,5,6"/>
1032 <param name="test_methods_opts" value="trim1"/>
1033 <param name="tail" value="left"/>
1034 <param name="proportiontocut" value="1.0"/>
1035 </test>
1036 <test>
1037 <param name="infile" value="input.tabular"/>
1038 <output name="outfile" file="scoreatpercentile.tabular"/>
1039 <param name="sample_one_cols" value="1,2,3,4"/>
1040 <param name="sample_two_cols" value="11,12,13,14"/>
1041 <param name="test_methods_opts" value="scoreatpercentile"/>
1042 <param name="mf" value="5.0"/>
1043 <param name="nf" value="50.0"/>
1044 <param name="interpolation" value="lower"/>
1045 </test>
1046 <test>
1047 <param name="infile" value="input.tabular"/>
1048 <output name="outfile" file="anderson.tabular"/>
1049 <param name="sample_one_cols" value="1,2,3,4"/>
1050 <param name="test_methods_opts" value="anderson"/>
1051 <param name="dist" value="expon"/>
1052 </test>
1053 <test>
1054 <param name="infile" value="input.tabular"/>
1055 <output name="outfile" file="boxcox_normmax.tabular"/>
1056 <param name="sample_one_cols" value="1,2,3,4"/>
1057 <param name="test_methods_opts" value="boxcox_normmax"/>
1058 <param name="method" value="mle"/>
1059 <param name="mf" value="-3.0"/>
1060 <param name="nf" value="3.0"/>
1061 </test>
1062 <test>
1063 <param name="infile" value="input.tabular"/>
1064 <output name="outfile" file="f_oneway.tabular"/>
1065 <repeat name="samples">
1066 <param name="sample_cols" value="1,2,3,4"/>
1067 </repeat>
1068 <repeat name="samples">
1069 <param name="sample_cols" value="5,6,7,8"/>
1070 </repeat>
1071 <param name="test_methods_opts" value="f_oneway"/>
1072 </test>
1073 <test>
1074 <param name="infile" value="input.tabular"/>
1075 <output name="outfile" file="shapiro.tabular"/>
1076 <param name="sample_one_cols" value="1,2,3,4"/>
1077 <param name="sample_two_cols" value="5,6"/>
1078 <param name="test_methods_opts" value="shapiro"/>
1079 <param name="reta" value="True"/>
1080 </test>
1081 <test>
1082 <param name="infile" value="input.tabular"/>
1083 <output name="outfile" file="power_divergence.tabular"/>
1084 <param name="sample_one_cols" value="1,2,3,4"/>
1085 <param name="sample_two_cols" value="5,6,7,8"/>
1086 <param name="test_methods_opts" value="power_divergence"/>
1087 <param name="ddof" value="1"/>
1088 <param name="lambda_" value="1"/>
1089 </test>
1090 <test>
1091 <param name="infile" value="input.tabular"/>
1092 <output name="outfile" file="itemfreq.tabular"/>
1093 <param name="sample_one_cols" value="1,2,3,4,5,6,7,8,9,10"/>
1094 <param name="test_methods_opts" value="itemfreq"/>
1095 </test>
1096 <test>
1097 <param name="infile" value="input.tabular"/>
1098 <output name="outfile" file="trimboth.tabular"/>
1099 <param name="sample_one_cols" value="1,2,3,4,5,6,7,8,9,10"/>
1100 <param name="proportiontocut" value="0"/>
1101 <param name="test_methods_opts" value="trimboth"/>
1102 </test>
1103 <test>
1104 <param name="infile" value="input.tabular"/>
1105 <output name="outfile" file="tmean.tabular"/>
1106 <param name="sample_one_cols" value="1,2,3,4,5,6"/>
1107 <param name="test_methods_opts" value="tmean"/>
1108 <param name="mf" value="0"/>
1109 <param name="nf" value="50"/>
1110 <param name="inclusive1" value="True"/>
1111 <param name="inclusive2" value="True"/>
1112 </test>
1113 <test>
1114 <param name="infile" value="input.tabular"/>
1115 <output name="outfile" file="tvar.tabular"/>
1116 <param name="sample_one_cols" value="1,2,3,4,5,6"/>
1117 <param name="test_methods_opts" value="tvar"/>
1118 <param name="mf" value="0"/>
1119 <param name="nf" value="50"/>
1120 <param name="inclusive1" value="True"/>
1121 <param name="inclusive2" value="True"/>
1122 </test>
1123 </tests>
1124 <help>
1125
1126 .. class:: warningmark
1127
1128
1129 Computes a large number of probability distributions as well as a statistical functions of any kind.
1130 For more informations have a look at the `SciPy site`_.
1131
1132 .. _`SciPy site`: http://docs.scipy.org/doc/scipy/reference/stats.html
1133
1134
1135 -----
1136
1137 ========
1138 Describe
1139 ========
1140
1141 Computes several descriptive statistics for samples x
1142
1143 -----
1144
1145 **The output are:**
1146
1147 size of the data : int
1148
1149 length of data along axis
1150
1151 (min, max): tuple of ndarrays or floats
1152
1153 minimum and maximum value of data array
1154
1155 arithmetic mean : ndarray or float
1156
1157 mean of data along axis
1158
1159 unbiased variance : ndarray or float
1160
1161 variance of the data along axis, denominator is number of observations minus one.
1162
1163 biased skewness : ndarray or float
1164
1165 skewness, based on moment calculations with denominator equal to the number of observations, i.e. no degrees of freedom correction
1166
1167 biased kurtosis : ndarray or float
1168
1169 kurtosis (Fisher), the kurtosis is normalized so that it is zero for the normal distribution. No degrees of freedom or bias correction is used.
1170
1171 **example**:
1172
1173 describe([4,417,8,3]) the result is (4,(3.0, 417.0),108.0,42440.6666667 ,1.15432044278, -0.666961688151)
1174
1175
1176 =====
1177 Gmean
1178 =====
1179
1180 Compute the geometric mean along the specified axis.
1181
1182 Returns the geometric average of the array elements. That is: n-th root of (x1 * x2 * ... * xn)
1183
1184 -----
1185
1186 **The output are:**
1187
1188 gmean : ndarray
1189
1190 see dtype parameter above
1191
1192 **example**:
1193
1194 stats.gmean([4,17,8,3],dtype='float64') the result is (6.35594365562)
1195
1196 =====
1197 Hmean
1198 =====
1199
1200 py.stats.hmean(a, axis=0, dtype=None)[source]
1201 Calculates the harmonic mean along the specified axis.
1202
1203 That is: n / (1/x1 + 1/x2 + ... + 1/xn)
1204
1205 **The output are:**
1206
1207 hmean : ndarray
1208
1209 see dtype parameter above
1210
1211
1212 **example**:
1213
1214 stats.hmean([4,17,8,3],dtype='float64')the result is (5.21405750799)
1215
1216 ========
1217 Kurtosis
1218 ========
1219
1220 Computes the kurtosis (Fisher or Pearson) of a dataset.
1221
1222 Kurtosis is the fourth central moment divided by the square of the variance. If Fisher’s definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution.
1223
1224 If bias is False then the kurtosis is calculated using k statistics to eliminate bias coming from biased moment estimators
1225
1226 -----
1227
1228 Computes the kurtosis for samples x .
1229
1230 **The output are:**
1231
1232 kurtosis : array
1233
1234 The kurtosis of values along an axis. If all values are equal, return -3 for Fisher’s definition and 0 for Pearson’s definition.
1235
1236 **example**:
1237
1238 kurtosis([4,417,8,3],0,true,true) the result is (-0.666961688151)
1239
1240 =============
1241 Kurtosis Test
1242 =============
1243
1244 Tests whether a dataset has normal kurtosis
1245
1246 This function tests the null hypothesis that the kurtosis of the population from which the sample was drawn is that of the normal distribution: kurtosis = 3(n-1)/(n+1).
1247
1248 -----
1249
1250 Computes the Z-value and p-value about samples x.
1251
1252 kurtosistest only valid for n>=20.
1253
1254 **The output are:**
1255
1256 z-score : float
1257
1258 The computed z-score for this test
1259
1260 p-value : float
1261
1262 The 2-sided p-value for the hypothesis test
1263
1264
1265 **example**:
1266
1267 kurtosistest([4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3]) the result is (0.29775013081425117, 0.7658938788569033)
1268
1269 ====
1270 Mode
1271 ====
1272
1273 Returns an array of the modal value in the passed array.
1274
1275 If there is more than one such value, only the first is returned. The bin-count for the modal bins is also returned.
1276
1277 -----
1278
1279 Computes the most common value for samples x .
1280
1281 **The output are:**
1282
1283 vals : ndarray
1284
1285 Array of modal values.
1286
1287 counts : ndarray
1288
1289 Array of counts for each mode.
1290
1291
1292 **example**:
1293
1294 mode([4,417,8,3]) the result is ([ 3.], [ 1.])
1295
1296 ======
1297 Moment
1298 ======
1299
1300 Calculates the nth moment about the mean for a sample.
1301
1302 Generally used to calculate coefficients of skewness and kurtosis.
1303
1304 -----
1305
1306 Computes the nth moment about the mean for samples x .
1307
1308 **The output are:**
1309
1310 n-th central moment : ndarray or float
1311
1312 The appropriate moment along the given axis or over all values if axis is None. The denominator for the moment calculation is the number of observations, no degrees of freedom correction is done.
1313
1314
1315 **example**:
1316
1317 mode([4,417,8,3],moment=2) the result is (31830.5)
1318
1319
1320 ===========
1321 Normal Test
1322 ===========
1323
1324 Tests whether a sample differs from a normal distribution.
1325
1326 This function tests the null hypothesis that a sample comes from a normal distribution. It is based on D’Agostino and Pearson’s test that combines skew and kurtosis to produce an omnibus test of normality.
1327
1328 -----
1329
1330 Computes the k2 and p-value for samples x.
1331
1332 skewtest is not valid with less than 8 samples.kurtosistest only valid for n>=20.
1333
1334 **The output are:**
1335
1336 k2 : float or array
1337
1338 s^2 + k^2, where s is the z-score returned by skewtest and k is the z-score returned by kurtosistest.
1339
1340 p-value : float or array
1341
1342 A 2-sided chi squared probability for the hypothesis test.
1343
1344
1345 **example**:
1346
1347 normaltest([4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3]) the result is (5.8877986151838, 0.052659990380181286)
1348
1349 ====
1350 Skew
1351 ====
1352
1353 Computes the skewness of a data set.
1354
1355 For normally distributed data, the skewness should be about 0. A skewness value > 0 means that there is more weight in the left tail of the distribution. The function skewtest can be used to determine if the skewness value is close enough to 0, statistically speaking.
1356
1357 -----
1358
1359 Computes the skewness from samples x.
1360
1361
1362 **The output are:**
1363
1364 skewness : ndarray
1365
1366 The skewness of values along an axis, returning 0 where all values are equal.
1367
1368
1369 **example**:
1370
1371 kurtosistest([4,417,8,3]) the result is (1.1543204427775307)
1372
1373
1374 =========
1375 Skew Test
1376 =========
1377
1378 Tests whether the skew is different from the normal distribution.
1379
1380 This function tests the null hypothesis that the skewness of the population that the sample was drawn from is the same as that of a corresponding normal distribution.
1381
1382 -----
1383
1384 Computes the z-value and p-value from samples x.
1385
1386 skewtest is not valid with less than 8 samples
1387
1388 **The output are:**
1389
1390 z-score : float
1391
1392 The computed z-score for this test.
1393
1394 p-value : float
1395
1396 a 2-sided p-value for the hypothesis test
1397
1398 **example**:
1399
1400 skewtest([4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3]) the result is (2.40814108282,0.0160339834731)
1401
1402 ======
1403 tmean
1404 ======
1405
1406 Compute the trimmed mean.
1407
1408 This function finds the arithmetic mean of given values, ignoring values outside the given limits.
1409
1410 -----
1411
1412 Computes the mean of samples x,considering the lower and higher limits.
1413
1414 Values in the input array less than the lower limit or greater than the upper limit will be ignored
1415
1416 for inclusive,These flags determine whether values exactly equal to the lower or upper limits are included. The default value is (True, True)
1417
1418 **The output are:**
1419
1420 tmean : float
1421
1422 The computed mean for this test.
1423
1424
1425 **example**:
1426
1427 tmean([4,17,8,3],(0,20),(true,true)) the result is (8.0)
1428
1429 =====
1430 tvar
1431 =====
1432
1433 Compute the trimmed variance
1434
1435 This function computes the sample variance of an array of values, while ignoring values which are outside of given limits
1436
1437 -----
1438
1439 Computes the variance of samples x,considering the lower and higher limits.
1440
1441 Values in the input array less than the lower limit or greater than the upper limit will be ignored
1442
1443 for inclusive,These flags determine whether values exactly equal to the lower or upper limits are included. The default value is (True, True)
1444
1445 **The output are:**
1446
1447 tvar : float
1448
1449 The computed variance for this test.
1450
1451
1452 **example**:
1453
1454 tvar([4,17,8,3],(0,99999),(true,true)) the result is (40.6666666667)
1455
1456 =====
1457 tmin
1458 =====
1459
1460 Compute the trimmed minimum.
1461
1462 This function finds the arithmetic minimum of given values, ignoring values outside the given limits.
1463
1464 -----
1465
1466 Compute the trimmed minimum
1467
1468 This function finds the miminum value of an array a along the specified axis, but only considering values greater than a specified lower limit.
1469
1470 **The output are:**
1471
1472 tmin : float
1473
1474 The computed min for this test.
1475
1476
1477 **example**:
1478
1479 stats.tmin([4,17,8,3],2,0,'true') the result is (3.0)
1480
1481 ============
1482 tmax
1483 ============
1484
1485 Compute the trimmed maximum.
1486
1487 This function finds the arithmetic maximum of given values, ignoring values outside the given limits.
1488
1489 This function computes the maximum value of an array along a given axis, while ignoring values larger than a specified upper limit.
1490
1491 **The output are:**
1492
1493 tmax : float
1494
1495 The computed max for this test.
1496
1497
1498 **example**:
1499
1500 stats.tmax([4,17,8,3],50,0,'true') the result is (17.0)
1501
1502 ============
1503 tstd
1504 ============
1505
1506 Compute the trimmed sample standard deviation
1507
1508 This function finds the sample standard deviation of given values, ignoring values outside the given limits.
1509
1510 -----
1511
1512 Computes the deviation of samples x,considering the lower and higher limits.
1513
1514 Values in the input array less than the lower limit or greater than the upper limit will be ignored
1515
1516 for inclusive,These flags determine whether values exactly equal to the lower or upper limits are included. The default value is (True, True)
1517
1518 **The output are:**
1519
1520 tstd : float
1521
1522 The computed deviation for this test.
1523
1524
1525 **example**:
1526
1527 tstd([4,17,8,3],(0,99999),(true,true)) the result is (6.37704215657)
1528
1529
1530 ============
1531 tsem
1532 ============
1533
1534 Compute the trimmed standard error of the mean.
1535
1536 This function finds the standard error of the mean for given values, ignoring values outside the given limits.
1537
1538 -----
1539
1540 Computes the standard error of mean for samples x,considering the lower and higher limits.
1541
1542 Values in the input array less than the lower limit or greater than the upper limit will be ignored
1543
1544 for inclusive,These flags determine whether values exactly equal to the lower or upper limits are included. The default value is (True, True)
1545
1546 **The output are:**
1547
1548 tsem : float
1549
1550 The computed the standard error of mean for this test.
1551
1552
1553 **example**:
1554
1555 tsem([4,17,8,3],(0,99999),(true,true)) the result is (3.18852107828)
1556
1557 ========
1558 nanmean
1559 ========
1560
1561 Compute the mean over the given axis ignoring nans
1562
1563 -----
1564
1565 Computes the mean for samples x without considering nans
1566
1567 **The output are:**
1568
1569 m : float
1570
1571 The computed the mean for this test.
1572
1573
1574 **example**:
1575
1576 tsem([4,17,8,3]) the result is (8.0)
1577
1578 =======
1579 nanstd
1580 =======
1581
1582 Compute the standard deviation over the given axis, ignoring nans.
1583
1584 -----
1585
1586 Computes the deviation for samples x without considering nans
1587
1588 **The output are:**
1589
1590 s : float
1591
1592 The computed the standard deviation for this test.
1593
1594
1595 **example**:
1596
1597 nanstd([4,17,8,3],0,'false') the result is (5.52268050859)
1598
1599
1600 ============
1601 nanmedian
1602 ============
1603
1604 Computes the median for samples x without considering nans
1605
1606 **The output are:**
1607
1608 m : float
1609
1610 The computed the median for this test.
1611
1612
1613 **example**:
1614
1615 nanmedian([4,17,8,3]) the result is (6.0)
1616
1617
1618 ============
1619 variation
1620 ============
1621
1622 Computes the coefficient of variation, the ratio of the biased standard deviation to the mean for samples x
1623
1624 **The output are:**
1625
1626 ratio: float
1627
1628 The ratio of the biased standard deviation to the mean for this test.
1629
1630
1631 **example**:
1632
1633 variation([4,17,8,3]) the result is (0.690335063574)
1634
1635 ============
1636 cumfreq
1637 ============
1638
1639 Returns a cumulative frequency histogram, using the histogram function.
1640
1641 **The output are:**
1642
1643 cumfreq : ndarray
1644
1645 Binned values of cumulative frequency.
1646
1647 lowerreallimit : float
1648
1649 Lower real limit
1650
1651 binsize : float
1652
1653 Width of each bin.
1654
1655 extrapoints : int
1656
1657 Extra points.
1658
1659
1660 **example**:
1661
1662 cumfreq([4,17,8,3],defaultreallimits=(2.0,3.5)) the result is ([ 0. 0. 0. 0. 0. 0. 1. 1. 1. 1.],2.0,0.15,3)
1663
1664 ==========
1665 histogram2
1666 ==========
1667
1668 Compute histogram using divisions in bins.
1669
1670 Count the number of times values from array a fall into numerical ranges defined by bins.
1671
1672 samples should at least have two numbers.
1673
1674 **The output are:**
1675
1676 histogram2 : ndarray of rank 1
1677
1678 Each value represents the occurrences for a given bin (range) of values.
1679
1680
1681 **example**:
1682
1683 stats.histogram2([4,17,8,3], [30,45,5,3]) the result is (array([ 0, -2, -2, 4]))
1684
1685 ============
1686 histogram
1687 ============
1688
1689 Separates the range into several bins and returns the number of instances in each bin
1690
1691 **The output are:**
1692
1693 histogram : ndarray
1694
1695 Number of points (or sum of weights) in each bin.
1696
1697 low_range : float
1698
1699 Lowest value of histogram, the lower limit of the first bin.
1700
1701 binsize : float
1702
1703 The size of the bins (all bins have the same size).
1704
1705 extrapoints : int
1706
1707 The number of points outside the range of the histogram.
1708
1709
1710 **example**:
1711
1712 histogram([4,17,8,3],defaultlimits=(2.0,3.4)) the result is ([ 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.],2.0,0.14,3)
1713
1714
1715 ============
1716 itemfreq
1717 ============
1718
1719 Computes the frequencies for numbers
1720
1721 **The output are:**
1722
1723 temfreq : (K, 2) ndarray
1724 A 2-D frequency table. Column 1 contains sorted, unique values from a, column 2 contains their respective counts.
1725
1726
1727 **example**:
1728
1729 variation([4,17,8,3]) the result is array([[ 3, 1], [ 4, 1],[ 8, 1],[17, 1]])
1730
1731 ===
1732 Sem
1733 ===
1734
1735 Calculates the standard error of the mean (or standard error of measurement) of the values in the input array.
1736
1737
1738 **The output are:**
1739
1740 s : ndarray or float
1741 The standard error of the mean in the sample(s), along the input axis.
1742
1743
1744 **example**:
1745
1746 variation([4,17,8,3],ddof=1) the result is(3.18852107828)
1747
1748 =====
1749 Z Map
1750 =====
1751
1752 Calculates the relative z-scores.
1753
1754 Returns an array of z-scores, i.e., scores that are standardized to zero mean and unit variance, where mean and variance are calculated from the comparison array.
1755
1756
1757 **The output are:**
1758
1759 zscore : array_like
1760
1761 Z-scores, in the same shape as scores.
1762
1763 **example**:
1764
1765 stats.zmap([4,17,8,3],[30,45,5,3],ddof=1)the result is[-0.82496302 -0.18469321 -0.62795692 -0.87421454]
1766
1767 =======
1768 Z Score
1769 =======
1770
1771 Calculates the z score of each value in the sample, relative to the sample mean and standard deviation
1772
1773
1774 **The output are:**
1775
1776 zscore : array_like
1777 The z-scores, standardized by mean and standard deviation of input array a.
1778
1779
1780 **example**:
1781
1782 variation([4,17,8,3],ddof=0) the result is ([-0.72428597 1.62964343 0. -0.90535746])
1783
1784 ===============
1785 Signal to noise
1786 ===============
1787
1788 The signal-to-noise ratio of the input data.
1789
1790 Returns the signal-to-noise ratio of a, here defined as the mean divided by the standard deviation.
1791
1792
1793 **The output are:**
1794
1795 s2n : ndarray
1796 The mean to standard deviation ratio(s) along axis, or 0 where the standard deviation is 0.
1797
1798
1799 **example**:
1800
1801 variation([4,17,8,3],ddof=0) the result is (1.44857193668)
1802
1803 ===================
1804 Percentile of score
1805 ===================
1806
1807 The percentile rank of a score relative to a list of scores.
1808
1809 A percentileofscore of, for example, 80% means that 80% of the scores in a are below the given score. In the case of gaps or ties, the exact definition depends on the optional keyword, kind.
1810
1811 **The output are:**
1812
1813 pcos : float
1814 Percentile-position of score (0-100) relative to a.
1815
1816
1817 **example**:
1818
1819 percentileofscore([4,17,8,3],score=3,kind='rank') the result is(25.0)
1820
1821 ===================
1822 Score at percentile
1823 ===================
1824
1825 Calculate the score at a given percentile of the input sequence.
1826
1827 For example, the score at per=50 is the median. If the desired quantile lies between two data points, we interpolate between them, according to the value of interpolation. If the parameter limit is provided, it should be a tuple (lower, upper) of two values.
1828
1829 The second simple should be in range [0,100].
1830
1831 **The output are:**
1832
1833 score : float or ndarray
1834 Score at percentile(s).
1835
1836
1837 **example**:
1838
1839 stats.scoreatpercentile([4,17,8,3],[8,3],(0,100),'fraction') the result is array([ 3.24, 3.09])
1840
1841 =======
1842 relfreq
1843 =======
1844
1845 Returns a relative frequency histogram, using the histogram function
1846
1847 numbins are the number of bins to use for the histogram.
1848
1849 **The output are:**
1850
1851 relfreq : ndarray
1852
1853 Binned values of relative frequency.
1854
1855 lowerreallimit : float
1856
1857 Lower real limit
1858
1859 binsize : float
1860
1861 Width of each bin.
1862
1863 extrapoints : int
1864
1865 Extra points.
1866
1867
1868 **example**:
1869
1870 stats.relfreq([4,17,8,3],10,(0,100)) the result is (array([ 0.75, 0.25, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 ]), 0, 10.0, 0)
1871
1872 ================
1873 Binned statistic
1874 ================
1875
1876 Compute a binned statistic for a set of data.
1877
1878 This is a generalization of a histogram function. A histogram divides the space into bins, and returns the count of the number of points in each bin. This function allows the computation of the sum, mean, median, or other statistic of the values within each bin.
1879
1880 Y must be the same shape as X
1881
1882 **The output are:**
1883
1884 statistic : array
1885
1886 The values of the selected statistic in each bin.
1887
1888 bin_edges : array of dtype float
1889
1890 Return the bin edges (length(statistic)+1).
1891
1892 binnumber : 1-D ndarray of ints
1893
1894 This assigns to each observation an integer that represents the bin in which this observation falls. Array has the same length as values.
1895
1896
1897 **example**:
1898
1899 stats.binned_statistic([4,17,8,3],[30,45,5,3],'sum',10,(0,100)) the result is ([ 38. 45. 0. 0. 0. 0. 0. 0. 0. 0.],[ 0. 10. 20. 30. 40. 50. 60. 70. 80. 90. 100.],[1 2 1 1])
1900
1901 ================
1902 obrientransform
1903 ================
1904
1905 Computes the O’Brien transform on input data (any number of arrays).
1906
1907 Used to test for homogeneity of variance prior to running one-way stats.
1908
1909 It has to have at least two samples.
1910
1911 **The output are:**
1912
1913 obrientransform : ndarray
1914
1915 Transformed data for use in an ANOVA. The first dimension of the result corresponds to the sequence of transformed arrays. If the arrays given are all 1-D of the same length, the return value is a 2-D array; otherwise it is a 1-D array of type object, with each element being an ndarray.
1916
1917
1918 **example**:
1919
1920 stats.obrientransformcenter([4,17,8,3], [30,45,5,3]) the result is (array([[ 16.5 , 124.83333333, -10.16666667, 31.5 ],[ 39.54166667, 877.04166667, 310.375 , 422.04166667]]))
1921
1922 =========
1923 bayes mvs
1924 =========
1925
1926 Bayesian confidence intervals for the mean, var, and std.alpha should be larger than 0,smaller than 1.
1927
1928
1929 **The output are:**
1930
1931 mean_cntr, var_cntr, std_cntr : tuple
1932
1933 The three results are for the mean, variance and standard deviation, respectively. Each result is a tuple of the form:
1934
1935 (center, (lower, upper))
1936
1937 with center the mean of the conditional pdf of the value given the data, and (lower, upper) a confidence interval, centered on the median, containing the estimate to a probability alpha.
1938
1939 **example**:
1940
1941 stats.bayes_mvs([4,17,8,3],0.8) the result is (8.0, (0.49625108326958145, 15.503748916730416));(122.0, (15.611548029617781, 346.74229584218108));(8.8129230241075476, (3.9511451542075475, 18.621017583423871))
1942
1943 =========
1944 sigmaclip
1945 =========
1946
1947 Iterative sigma-clipping of array elements.
1948
1949 The output array contains only those elements of the input array c that satisfy the conditions
1950
1951 **The output are:**
1952
1953 c : ndarray
1954 Input array with clipped elements removed.
1955 critlower : float
1956 Lower threshold value use for clipping.
1957 critlupper : float
1958 Upper threshold value use for clipping.
1959
1960
1961 **example**:
1962
1963 sigmaclip([4,17,8,3]) the result is [ 4. 17. 8. 3.],-14.0907220344,30.0907220344)
1964
1965 =========
1966 threshold
1967 =========
1968
1969 Clip array to a given value.
1970
1971 Similar to numpy.clip(), except that values less than threshmin or greater than threshmax are replaced by newval, instead of by threshmin and threshmax respectively.
1972
1973
1974 **The output are:**
1975
1976 out : ndarray
1977 The clipped input array, with values less than threshmin or greater than threshmax replaced with newval.
1978
1979 **example**:
1980
1981 stats.threshold([4,17,8,3],2,8,0)the result is array([4, 17, 8, 3])
1982
1983 ========
1984 trimboth
1985 ========
1986
1987 Slices off a proportion of items from both ends of an array.
1988
1989 Slices off the passed proportion of items from both ends of the passed array (i.e., with proportiontocut = 0.1, slices leftmost 10% and rightmost 10% of scores). You must pre-sort the array if you want ‘proper’ trimming. Slices off less if proportion results in a non-integer slice index (i.e., conservatively slices off proportiontocut).
1990
1991
1992 **The output are:**
1993
1994 out : ndarray
1995 Trimmed version of array a.
1996
1997 **example**:
1998
1999 stats.trimboth([4,17,8,3],0.1)the result is array([ 4, 17, 8, 3])
2000
2001 =====
2002 trim1
2003 =====
2004
2005 Slices off a proportion of items from ONE end of the passed array distribution.
2006
2007 If proportiontocut = 0.1, slices off ‘leftmost’ or ‘rightmost’ 10% of scores. Slices off LESS if proportion results in a non-integer slice index (i.e., conservatively slices off proportiontocut ).
2008
2009 **The output are:**
2010
2011 trim1 : ndarray
2012
2013 Trimmed version of array a
2014
2015 **example**:
2016
2017 stats.trim1([4,17,8,3],0.5,'left')the result is array([8, 3])
2018
2019 =========
2020 spearmanr
2021 =========
2022
2023 Calculates a Spearman rank-order correlation coefficient and the p-value to test for non-correlation.
2024
2025 The Spearman correlation is a nonparametric measure of the monotonicity of the relationship between two datasets. Unlike the Pearson correlation, the Spearman correlation does not assume that both datasets are normally distributed. Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation. Correlations of -1 or +1 imply an exact monotonic relationship. Positive correlations imply that as x increases, so does y. Negative correlations imply that as x increases, y decreases.
2026
2027 **The output are:**
2028
2029 rho : float or ndarray (2-D square)
2030
2031 Spearman correlation matrix or correlation coefficient (if only 2 variables are given as parameters. Correlation matrix is square with length equal to total number of variables (columns or rows) in a and b combined.
2032
2033 p-value : float
2034
2035 The two-sided p-value for a hypothesis test whose null hypothesis is that two sets of data are uncorrelated, has same dimension as rho.
2036
2037 **example**:
2038
2039 stats.spearmanr([4,17,8,3,30,45,5,3],[5,3,4,17,8,3,30,45])the result is (-0.722891566265, 0.0427539458876)
2040
2041 ========
2042 f oneway
2043 ========
2044
2045 Performs a 1-way ANOVA.
2046
2047 The one-way ANOVA tests the null hypothesis that two or more groups have the same population mean. The test is applied to samples from two or more groups, possibly with differing sizes.
2048
2049 **The output are:**
2050
2051 F-value : float
2052
2053 The computed F-value of the test.
2054
2055 p-value : float
2056
2057 The associated p-value from the F-distribution.
2058
2059 **example**:
2060
2061 stats. f_oneway([4,17,8,3], [30,45,5,3]) the result is (1.43569457222,0.276015080537)
2062
2063 =================
2064 Mann-Whitney rank
2065 =================
2066
2067 Compute the Wilcoxon rank-sum statistic for two samples.
2068
2069 The Wilcoxon rank-sum test tests the null hypothesis that two sets of measurements are drawn from the same distribution. The alternative hypothesis is that values in one sample are more likely to be larger than the values in the other sample.
2070
2071 This test should be used to compare two samples from continuous distributions. It does not handle ties between measurements in x and y. For tie-handling and an optional continuity correction use mannwhitneyu.
2072
2073 -----
2074
2075 Computes the Mann-Whitney rank test on samples x and y.
2076
2077 u : float
2078
2079 The Mann-Whitney statistics.
2080
2081 prob : float
2082
2083 One-sided p-value assuming a asymptotic normal distribution.
2084
2085 ===================
2086 Ansari-Bradley test
2087 ===================
2088
2089 Perform the Ansari-Bradley test for equal scale parameters
2090
2091 The Ansari-Bradley test is a non-parametric test for the equality of the scale parameter of the distributions from which two samples were drawn.
2092
2093 The p-value given is exact when the sample sizes are both less than 55 and there are no ties, otherwise a normal approximation for the p-value is used.
2094
2095 -----
2096
2097 Computes the Ansari-Bradley test for samples x and y.
2098
2099 **The output are:**
2100
2101 AB : float
2102
2103 The Ansari-Bradley test statistic
2104
2105 p-value : float
2106
2107 The p-value of the hypothesis test
2108
2109 **example**:
2110
2111 ansari([1,2,3,4],[15,5,20,8,10,12]) the result is (10.0, 0.53333333333333333)
2112
2113 ========
2114 bartlett
2115 ========
2116
2117 Perform Bartlett’s test for equal variances
2118
2119 Bartlett’s test tests the null hypothesis that all input samples are from populations with equal variances.
2120
2121 It has to have at least two samples.
2122
2123 **The output are:**
2124
2125 T : float
2126
2127 The test statistic.
2128
2129 p-value : float
2130
2131 The p-value of the test.
2132
2133
2134 **example**:
2135
2136 stats.bartlett([4,17,8,3], [30,45,5,3]) the result is (2.87507113948,0.0899609995242)
2137
2138 ======
2139 levene
2140 ======
2141
2142 Perform Levene test for equal variances.
2143
2144 The Levene test tests the null hypothesis that all input samples are from populations with equal variances.
2145
2146 It has to have at least two samples.
2147
2148 **The output are:**
2149
2150 W : float
2151
2152 The test statistic.
2153
2154 p-value : float
2155
2156 The p-value for the test.
2157
2158
2159 **example**:
2160
2161 stats.levene(center='mean',proportiontocut=0.01,[4,17,8,3], [30,45,5,3]) the result is (11.5803858521,0.014442549362)
2162
2163 =======
2164 fligner
2165 =======
2166
2167 Perform Fligner’s test for equal variances.
2168
2169 Fligner’s test tests the null hypothesis that all input samples are from populations with equal variances. Fligner’s test is non-parametric in contrast to Bartlett’s test bartlett and Levene’s test levene.
2170
2171 **The output are:**
2172
2173 Xsq : float
2174
2175 The test statistic.
2176
2177 p-value : float
2178
2179 The p-value for the hypothesis test.
2180
2181
2182 ==========
2183 linregress
2184 ==========
2185
2186 Calculate a regression line
2187
2188 This computes a least-squares regression for two sets of measurements.
2189
2190 -----
2191
2192 Computes the least-squares regression for samples x and y.
2193
2194 **The output are:**
2195
2196 slope : float
2197
2198 slope of the regression line
2199
2200 intercept : float
2201
2202 intercept of the regression line
2203
2204 r-value : float
2205
2206 correlation coefficient
2207
2208 p-value : float
2209
2210 two-sided p-value for a hypothesis test whose null hypothesis is that the slope is zero.
2211
2212 stderr : float
2213
2214 Standard error of the estimate
2215
2216 **example**:
2217
2218 linregress([4,417,8,3],[30,45,5,3]) the result is (0.0783053989099, 12.2930169177, 0.794515680443,0.205484319557,0.0423191764713)
2219
2220 ===========
2221 ttest 1samp
2222 ===========
2223
2224 Calculates the T-test for the mean of ONE group of scores.
2225
2226 This is a two-sided test for the null hypothesis that the expected value (mean) of a sample of independent observations a is equal to the given population mean, popmean.
2227
2228 **The output are:**
2229
2230 t : float or array
2231
2232 The calculated t-statistic.
2233
2234 prob : float or array
2235
2236 The two-tailed p-value.
2237
2238 **example**:
2239
2240 stats.ttest_1samp([4,17,8,3],[30,45,5,3])the result is (array([ -6.89975053, -11.60412589, 0.94087507, 1.56812512]), array([ 0.00623831, 0.00137449, 0.41617971, 0.21485306]))
2241
2242 =========
2243 ttest ind
2244 =========
2245
2246 Calculates the T-test for the means of TWO INDEPENDENT samples of scores.
2247
2248 This is a two-sided test for the null hypothesis that 2 independent samples have identical average (expected) values. This test assumes that the populations have identical variances.
2249
2250 The independent samples t-test is used when two separate sets of independent and identically distributed samples are obtained, one from each of the two populations
2251 being compared.
2252 -----
2253 Computes the T-test for the means of independent samples x and y.
2254
2255 **The output are:**
2256
2257 t : float or array
2258
2259 The calculated t-statistic.
2260
2261 prob : float or array
2262
2263 The two-tailed p-value.
2264
2265 **example**:
2266
2267 ttest_ind([4,417,8,3],[30,45,5,3]) the result is (0.842956644207,0.431566932748)
2268
2269 =========
2270 ttest rel
2271 =========
2272
2273 Calculates the T-test on TWO RELATED samples of scores, a and b.
2274
2275 This is a two-sided test for the null hypothesis that 2 related or repeated samples have identical average (expected) values.
2276
2277 related samples t-tests typically consist of a sample of matched pairs of similar units, or one group of units that has been tested twice (a "repeated measures" t-test)
2278
2279 -----
2280
2281 Computes the T-test for the means of related samples x and y.
2282
2283 **The output are:**
2284
2285 t : float or array
2286
2287 t-statistic
2288
2289 prob : float or array
2290
2291 two-tailed p-value
2292
2293 **example**:
2294
2295 ttest_rel([4,417,8,3],[30,45,5,3]) the result is (0.917072474241,0.426732624361)
2296
2297 =========
2298 chisquare
2299 =========
2300
2301 Calculates a one-way chi square test.
2302
2303 The chi square test tests the null hypothesis that the categorical data has the given frequencies.
2304
2305 **The output are:**
2306
2307 chisq : float or ndarray
2308
2309 The chi-squared test statistic. The value is a float if axis is None or f_obs and f_exp are 1-D.
2310
2311 p : float or ndarray
2312
2313 The p-value of the test. The value is a float if ddof and the return value chisq are scalars.
2314
2315 **example**:
2316
2317 stats.chisquare([4,17,8,3],[30,45,5,3],ddof=1)the result is (41.7555555556,8.5683326078e-10)
2318
2319 ================
2320 power divergence
2321 ================
2322
2323 Cressie-Read power divergence statistic and goodness of fit test.
2324
2325 This function tests the null hypothesis that the categorical data has the given frequencies, using the Cressie-Read power divergence statistic.
2326
2327 **The output are:**
2328
2329 stat : float or ndarray
2330
2331 The Cressie-Read power divergence test statistic. The value is a float if axis is None or if` f_obs and f_exp are 1-D.
2332
2333 p : float or ndarray
2334
2335 The p-value of the test. The value is a float if ddof and the return value stat are scalars.
2336
2337 **example**:
2338
2339 stats.power_divergence([4,17,8,3],[30,45,5,3],1,lambda=1)the result is (41.7555555556, 8.5683326078e-10)
2340
2341 ==========
2342 tiecorrect
2343 ==========
2344
2345 Tie correction factor for ties in the Mann-Whitney U and Kruskal-Wallis H tests.
2346
2347 **The output are:**
2348
2349 factor : float
2350
2351 Correction factor for U or H.
2352
2353 **example**:
2354
2355 stats.tiecorrect([4,17,8,3,30,45,5,3])the result is (0.988095238095)
2356
2357 ========
2358 rankdata
2359 ========
2360
2361 Assign ranks to data, dealing with ties appropriately.
2362
2363 Ranks begin at 1. The method argument controls how ranks are assigned to equal values. See [R308] for further discussion of ranking methods.
2364
2365 **The output are:**
2366
2367 ranks : ndarray
2368
2369 An array of length equal to the size of a, containing rank scores.
2370
2371 **example**:
2372
2373 stats.rankdata([4,17,8,3],average)the result is ([ 2. 4. 3. 1.])
2374
2375 =======
2376 kruskal
2377 =======
2378
2379 Compute the Kruskal-Wallis H-test for independent samples
2380
2381 The Kruskal-Wallis H-test tests the null hypothesis that the population median of all of the groups are equal. It is a non-parametric version of ANOVA.
2382
2383 The number of samples have to be more than one
2384
2385 **The output are:**
2386
2387 H-statistic : float
2388
2389 The Kruskal-Wallis H statistic, corrected for ties
2390
2391 p-value : float
2392
2393 The p-value for the test using the assumption that H has a chi square distribution
2394
2395
2396 **example**:
2397
2398 stats. kruskal([4,17,8,3], [30,45,5,3]) the result is (0.527108433735,0.467825077285)
2399
2400 ==================
2401 friedmanchisquare
2402 ==================
2403
2404 Computes the Friedman test for repeated measurements
2405
2406 The Friedman test tests the null hypothesis that repeated measurements of the same individuals have the same distribution. It is often used to test for consistency among measurements obtained in different ways.
2407
2408 The number of samples have to be more than two.
2409
2410 **The output are:**
2411
2412 friedman chi-square statistic : float
2413
2414 the test statistic, correcting for ties
2415
2416 p-value : float
2417
2418 the associated p-value assuming that the test statistic has a chi squared distribution
2419
2420
2421 **example**:
2422
2423 stats.friedmanchisquare([4,17,8,3],[8,3,30,45],[30,45,5,3])the result is (0.933333333333,0.627089085273)
2424
2425 =====
2426 mood
2427 =====
2428
2429 Perform Mood’s test for equal scale parameters.
2430
2431 Mood’s two-sample test for scale parameters is a non-parametric test for the null hypothesis that two samples are drawn from the same distribution with the same scale parameter.
2432
2433 -----
2434
2435 Computes the Mood’s test for equal scale samples x and y.
2436
2437 **The output are:**
2438
2439 z : scalar or ndarray
2440
2441 The z-score for the hypothesis test. For 1-D inputs a scalar is returned;
2442
2443 p-value : scalar ndarray
2444
2445 The p-value for the hypothesis test.
2446
2447 **example**:
2448
2449 mood([4,417,8,3],[30,45,5,3]) the result is (0.396928310068,0.691420327045)
2450
2451 ===============
2452 combine_pvalues
2453 ===============
2454
2455 Methods for combining the p-values of independent tests bearing upon the same hypothesis.
2456
2457
2458 **The output are:**
2459
2460 statistic: float
2461
2462 The statistic calculated by the specified method: - “fisher”: The chi-squared statistic - “stouffer”: The Z-score
2463
2464 pval: float
2465
2466 The combined p-value.
2467
2468 **example**:
2469
2470 stats.combine_pvalues([4,17,8,3],method='fisher',weights=[5,6,7,8]) the result is (-14.795123071,1.0)
2471
2472 ===========
2473 median test
2474 ===========
2475
2476 Mood’s median test.
2477
2478 Test that two or more samples come from populations with the same median.
2479
2480 **The output are:**
2481
2482 stat : float
2483
2484 The test statistic. The statistic that is returned is determined by lambda. The default is Pearson’s chi-squared statistic.
2485
2486 p : float
2487
2488 The p-value of the test.
2489
2490 m : float
2491
2492 The grand median.
2493
2494 table : ndarray
2495
2496 The contingency table.
2497
2498
2499 **example**:
2500
2501 stats.median_test(ties='below',correction=True ,lambda=1,*a)the result is ((0.0, 1.0, 6.5, array([[2, 2],[2, 2]])))
2502
2503 ========
2504 shapiro
2505 ========
2506
2507 Perform the Shapiro-Wilk test for normality.
2508
2509 The Shapiro-Wilk test tests the null hypothesis that the data was drawn from a normal distribution.
2510
2511 -----
2512
2513 Computes the Shapiro-Wilk test for samples x and y.
2514
2515 If x has length n, then y must have length n/2.
2516
2517 **The output are:**
2518
2519 W : float
2520
2521 The test statistic.
2522
2523 p-value : float
2524
2525 The p-value for the hypothesis test.
2526
2527 a : array_like, optional
2528
2529 If reta is True, then these are the internally computed “a” values that may be passed into this function on future calls.
2530
2531
2532 **example**:
2533
2534 shapiro([4,417,8,3],[45,5]) the result is (0.66630089283, 0.00436889193952, [45,5])
2535
2536 ========
2537 anderson
2538 ========
2539
2540 Anderson-Darling test for data coming from a particular distribution
2541
2542 The Anderson-Darling test is a modification of the Kolmogorov- Smirnov test kstest for the null hypothesis that a sample is drawn from a population that follows a particular distribution. For the Anderson-Darling test, the critical values depend on which distribution is being tested against. This function works for normal, exponential, logistic, or Gumbel (Extreme Value Type I) distributions.
2543
2544 -----
2545
2546 Computes the Anderson-Darling test for samples x which comes from a specific distribution..
2547
2548 **The output are:**
2549
2550
2551 A2 : float
2552
2553 The Anderson-Darling test statistic
2554
2555 critical : list
2556
2557 The critical values for this distribution
2558
2559 sig : list
2560
2561 The significance levels for the corresponding critical values in percents. The function returns critical values for a differing set of significance levels depending on the distribution that is being tested against.
2562
2563 **example**:
2564
2565 anderson([4,417,8,3],norm) the result is (0.806976419634,[ 1.317 1.499 1.799 2.098 2.496] ,[ 15. 10. 5. 2.5 1. ])
2566
2567 ==========
2568 binom_test
2569 ==========
2570
2571 Perform a test that the probability of success is p.
2572
2573 This is an exact, two-sided test of the null hypothesis that the probability of success in a Bernoulli experiment is p.
2574
2575 he binomial test is an exact test of the statistical significance of deviations from a theoretically expected distribution of observations into two categories.
2576
2577 -----
2578
2579 Computes the test for the probability of success is p .
2580
2581 **The output are:**
2582
2583 p-value : float
2584
2585 The p-value of the hypothesis test
2586
2587 **example**:
2588
2589 binom_test([417,8],1,0.5) the result is (5.81382734132e-112)
2590
2591 ========
2592 pearsonr
2593 ========
2594
2595 Calculates a Pearson correlation coefficient and the p-value for testing non-correlation.
2596
2597 The Pearson correlation coefficient measures the linear relationship between two datasets.The value of the correlation (i.e., correlation coefficient) does not depend on the specific measurement units used.
2598
2599 **The output are:**
2600
2601 Pearson’s correlation coefficient: float
2602
2603 2-tailed p-value: float
2604
2605
2606 **example**:
2607
2608 pearsonr([4,17,8,3],[30,45,5,3]) the result is (0.695092958988,0.304907041012)
2609
2610 ========
2611 wilcoxon
2612 ========
2613
2614 Calculate the Wilcoxon signed-rank test.
2615
2616 The Wilcoxon signed-rank test tests the null hypothesis that two related paired samples come from the same distribution. In particular, it tests whether the distribution of the differences x - y is symmetric about zero. It is a non-parametric version of the paired T-test.
2617
2618 **The output are:**
2619
2620 T : float
2621
2622 The sum of the ranks of the differences above or below zero, whichever is smaller.
2623
2624 p-value : float
2625
2626 The two-sided p-value for the test.
2627
2628
2629 **example**:
2630
2631 stats.wilcoxon([3,6,23,70,20,55,4,19,3,6],
2632 [23,70,20,55,4,19,3,6,23,70],zero_method='pratt',correction=True) the result is (23.0, 0.68309139830960874)
2633
2634 ==============
2635 pointbiserialr
2636 ==============
2637
2638 Calculates a Pearson correlation coefficient and the p-value for testing non-correlation.
2639
2640 The Pearson correlation coefficient measures the linear relationship between two datasets.The value of the correlation (i.e., correlation coefficient) does not depend on the specific measurement units used.
2641 **The output are:**
2642
2643 r : float
2644
2645 R value
2646
2647 p-value : float
2648
2649 2-tailed p-value
2650
2651
2652 **example**:
2653
2654 pointbiserialr([0,0,0,1,1,1,1],[1,0,1,2,3,4,5]) the result is (0.84162541153017323, 0.017570710081214368)
2655
2656 ========
2657 ks_2samp
2658 ========
2659
2660 Computes the Kolmogorov-Smirnov statistic on 2 samples.
2661
2662 This is a two-sided test for the null hypothesis that 2 independent samples are drawn from the same continuous distribution.
2663
2664 If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same.
2665
2666 **The output are:**
2667
2668 D : float
2669
2670 KS statistic
2671
2672 p-value : float
2673
2674 two-tailed p-value
2675
2676
2677 **example**:
2678
2679 ks_2samp([4,17,8,3],[30,45,5,3]) the result is (0.5,0.534415719217)
2680
2681 ==========
2682 kendalltau
2683 ==========
2684
2685 Calculates Kendall’s tau, a correlation measure for sample x and sample y.
2686
2687 sample x and sample y should be in the same size.
2688
2689 Kendall’s tau is a measure of the correspondence between two rankings. Values close to 1 indicate strong agreement, values close to -1 indicate strong disagreement. This is the tau-b version of Kendall’s tau which accounts for ties.
2690
2691
2692 **The output are:**
2693
2694 Kendall’s tau : float
2695
2696 The tau statistic.
2697
2698 p-value : float
2699
2700 The two-sided p-value for a hypothesis test whose null hypothesis is an absence of association, tau = 0.
2701
2702
2703 **example**:
2704
2705 kendalltau([4,17,8,3],[30,45,5,3]),the result is (0.666666666667,0.174231399708)
2706
2707 ================
2708 chi2_contingency
2709 ================
2710
2711 Chi-square test of independence of variables in a contingency table.
2712
2713 This function computes the chi-square statistic and p-value for the hypothesis test of independence of the observed frequencies in the contingency table observed.
2714
2715 **The output are:**
2716
2717 chi2 : float
2718
2719 The test statistic.
2720
2721 p : float
2722
2723 The p-value of the test
2724
2725 dof : int
2726
2727 Degrees of freedom
2728
2729 expected : ndarray, same shape as observed
2730
2731 The expected frequencies, based on the marginal sums of the table.
2732
2733 **example**:
2734
2735 stats.chi2_contingency([4,17,8,3],1)the result is (0.0, 1.0, 0, array([ 4., 17., 8., 3.]))
2736
2737 ======
2738 boxcox
2739 ======
2740
2741 Return a positive dataset transformed by a Box-Cox power transformation
2742
2743 **The output are:**
2744
2745 boxcox : ndarray
2746
2747 Box-Cox power transformed array.
2748
2749 maxlog : float, optional
2750
2751 If the lmbda parameter is None, the second returned argument is the lambda that maximizes the log-likelihood function.
2752
2753 (min_ci, max_ci) : tuple of float, optional
2754
2755 If lmbda parameter is None and alpha is not None, this returned tuple of floats represents the minimum and maximum confidence limits given alpha.
2756
2757
2758 **example**:
2759
2760 stats.boxcox([4,17,8,3],0.9) the result is ([ 1.03301717 1.60587825 1.35353026 0.8679017 ],-0.447422166194,(-0.5699221654511225, -0.3259515659400082))
2761
2762 ==============
2763 boxcox normmax
2764 ==============
2765
2766 Compute optimal Box-Cox transform parameter for input data
2767
2768 **The output are:**
2769
2770 maxlog : float or ndarray
2771
2772 The optimal transform parameter found. An array instead of a scalar for method='all'.
2773
2774
2775 **example**:
2776
2777 stats.boxcox_normmax([4,17,8,3],(-2,2),'pearsonr')the result is (-0.702386238971)
2778
2779 ==========
2780 boxcox llf
2781 ==========
2782
2783 The boxcox log-likelihood function
2784
2785 **The output are:**
2786
2787 llf : float or ndarray
2788
2789 Box-Cox log-likelihood of data given lmb. A float for 1-D data, an array otherwise.
2790
2791 **example**:
2792
2793 stats.boxcox_llf(1,[4,17,8,3]) the result is (-6.83545336723)
2794
2795 =======
2796 entropy
2797 =======
2798
2799 Calculate the entropy of a distribution for given probability values.
2800
2801 If only probabilities pk are given, the entropy is calculated as S = -sum(pk * log(pk), axis=0).
2802
2803 If qk is not None, then compute the Kullback-Leibler divergence S = sum(pk * log(pk / qk), axis=0).
2804
2805 This routine will normalize pk and qk if they don’t sum to 1.
2806
2807 **The output are:**
2808
2809 S : float
2810
2811 The calculated entropy.
2812
2813
2814 **example**:
2815
2816 stats.entropy([4,17,8,3],[30,45,5,3],1.6)the result is (0.641692653659)
2817
2818 ======
2819 kstest
2820 ======
2821
2822 Perform the Kolmogorov-Smirnov test for goodness of fit.
2823
2824 **The output are:**
2825
2826 D : float
2827
2828 KS test statistic, either D, D+ or D-.
2829
2830 p-value : float
2831
2832 One-tailed or two-tailed p-value.
2833
2834 **example**:
2835
2836 stats.kstest([4,17,8,3],'norm',N=20,alternative='two-sided',mode='approx')the result is (0.998650101968,6.6409100441e-12)
2837
2838 ===========
2839 theilslopes
2840 ===========
2841
2842 Computes the Theil-Sen estimator for a set of points (x, y).
2843
2844 theilslopes implements a method for robust linear regression. It computes the slope as the median of all slopes between paired values.
2845
2846 **The output are:**
2847
2848 medslope : float
2849
2850 Theil slope.
2851
2852 medintercept : float
2853
2854 Intercept of the Theil line, as median(y) - medslope*median(x).
2855
2856 lo_slope : float
2857
2858 Lower bound of the confidence interval on medslope.
2859
2860 up_slope : float
2861
2862 Upper bound of the confidence interval on medslope.
2863
2864 **example**:
2865
2866 stats.theilslopes([4,17,8,3],[30,45,5,3],0.95)the result is (0.279166666667,1.11458333333,-0.16,2.5)
2867
2868 </help>
2869 </tool>