|
0
|
1 <tool id="stats_perf_tool" name="Performance metrics" version="5.11.0">
|
|
|
2 <description>suitable for boolean classification problems (perf)</description>
|
|
|
3 <requirements>
|
|
|
4 <requirement type="package" version="5.11">perf</requirement>
|
|
|
5 </requirements>
|
|
|
6 <command>
|
|
|
7 perf
|
|
|
8 -t $threshold
|
|
|
9 #echo ' '.join(str($performance_measures).split(','))#
|
|
|
10 $plot
|
|
|
11 -file "${infile}"
|
|
|
12 2>/dev/null
|
|
|
13 > perf.out;
|
|
|
14
|
|
|
15 #if str($plot):
|
|
|
16 csplit --prefix 'perf' -s perf.out '/^$/';
|
|
|
17 cat perf00 | tr ' ' \\t > perf_plotting_data.out;
|
|
|
18 cat perf01 | awk '{printf("%s\t%s\n",$1,$2)}' > perf_results.out;
|
|
|
19 #else:
|
|
|
20 cat perf.out | awk '{printf("%s\t%s\n",$1,$2)}' > perf_results.out;
|
|
|
21 #end if
|
|
|
22
|
|
|
23 </command>
|
|
|
24 <inputs>
|
|
|
25 <param name="infile" format="tabular" type="data" label="File to select" help="1st col targets, 2nd col predictions (-infile)"/>
|
|
|
26
|
|
|
27 <param name="performance_measures" multiple="True" type="select" display="checkboxes" label="Select a pre-defined filtering set">
|
|
|
28 <option value="-ACC" selected="True">Accuracy</option>
|
|
|
29 <option value="-RMS">Root Mean Squared Error</option>
|
|
|
30 <option value="-CXE">Mean Cross-Entropy</option>
|
|
|
31 <option value="-ROC" selected="True">ROC area</option>
|
|
|
32 <option value="-R50">ROC area up to 50 negative examples</option>
|
|
|
33 <option value="-SEN">Sensitivity</option>
|
|
|
34 <option value="-SPC">Specificity</option>
|
|
|
35 <option value="-NPV">Negative Predictive Value</option>
|
|
|
36 <option value="-PPV">Positive Predictive Value</option>
|
|
|
37 <option value="-PRE">Precision</option>
|
|
|
38 <option value="-REC">Recall</option>
|
|
|
39 <option value="-PRF">F1 score</option>
|
|
|
40 <option value="-PRB">Precision/Recall Break Even Point</option>
|
|
|
41 <option value="-APR" selected="True">Mean Average Precision</option>
|
|
|
42 <!--option value="-LFT">Lift (at threshold)</option>
|
|
|
43 <option value="-TOP1">Top 1: is the top ranked case positive</option>
|
|
|
44 <option value="-TOP10">Top 10: is there a positive in the top 10 ranked cases</option>
|
|
|
45 <option value="-NTOP"> How many positives in the top N ranked cases</option>
|
|
|
46 <option value="-RKL">Rank of *last* (poorest ranked) positive case</option>
|
|
|
47 <option value="-NRM">Norm error using metric</option>
|
|
|
48 <option value="-CST">Total cost using these cost values, plus min-cost results</option-->
|
|
|
49 <!--option value="-SAR">typically wACC = wROC = wRMS = 1.0</option-->
|
|
|
50 <!--option value="-CAL">CA1/CA2 scores</option-->
|
|
|
51 <!--option value="-SLQ">Slac Q-score</option-->
|
|
|
52 </param>
|
|
|
53
|
|
|
54 <param name="plot" type="select" label="Plotting type">
|
|
|
55 <option value="" selected="True">No plot</option>
|
|
|
56 <option value="-plot roc">ROC plot</option>
|
|
|
57 <option value="-plor pr">Precision/Recall plot</option>
|
|
|
58 <option value="-plot lift">Lift versus threshold plot</option>
|
|
|
59 <option value="-plor cost">Cost versus threshold plot</option>
|
|
|
60 <option value="-plor acc">Accuracy versus threshold plot</option>
|
|
|
61 </param>
|
|
|
62
|
|
|
63 <param name="threshold" size="4" type="float" min="0" value="0.5" label="Threshold"/>
|
|
|
64
|
|
|
65 </inputs>
|
|
|
66
|
|
|
67 <outputs>
|
|
|
68 <data format="tabular" name="outfile" from_work_dir="perf_results.out" label="Performance measures from ${on_string}" />
|
|
|
69 <data format="tabular" name="outfile_plotting" from_work_dir="perf_plotting_data.out" label="Performance values from ${on_string}">
|
|
|
70 <filter>plot is not ''</filter>
|
|
|
71 </data>
|
|
|
72 </outputs>
|
|
|
73 <tests>
|
|
|
74 <test>
|
|
|
75 <param name="infile" ftype="tabular" value="testperf.dat"/>
|
|
|
76 <param name="performance_measures" value="-ACC,-ROC,-APR" />
|
|
|
77 <output name="outfile" ftype="tabular" file="testperf.results" />
|
|
|
78 <output name="outfile_plotting" ftype="tabular" file="testperf.results.plot" />
|
|
|
79 </test>
|
|
|
80 </tests>
|
|
|
81 <help>
|
|
|
82
|
|
|
83 **What it does**
|
|
|
84
|
|
|
85 Perf calculates a variety of performance metrics suitable for boolean classification problems. Metrics include: accuracy, root-mean-squared-error, cross-entropy, precision, recall, precision/recall break-even point and F-score, area under the ROC curve, lift, weighted cost, top 1, top 10, rank of lowest positive case, q-score, several measures of probability calibration, etc.
|
|
|
86
|
|
|
87 For more information please refer to:
|
|
|
88
|
|
|
89 http://osmot.cs.cornell.edu/kddcup/software.html
|
|
|
90
|
|
|
91 </help>
|
|
|
92 </tool>
|