diff perf.xml @ 0:e390b5b6b89c draft

Uploaded
author bgruening
date Thu, 15 May 2014 11:34:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/perf.xml	Thu May 15 11:34:26 2014 -0400
@@ -0,0 +1,92 @@
+<tool id="stats_perf_tool" name="Performance metrics" version="5.11.0">
+    <description>suitable for boolean classification problems (perf)</description>
+    <requirements>
+        <requirement type="package" version="5.11">perf</requirement>
+    </requirements>
+    <command>
+        perf 
+            -t $threshold
+            #echo ' '.join(str($performance_measures).split(','))#
+            $plot
+            -file "${infile}"
+            2>/dev/null
+            > perf.out;
+
+        #if str($plot):
+            csplit --prefix 'perf' -s perf.out '/^$/';
+            cat perf00 | tr ' ' \\t > perf_plotting_data.out;
+            cat perf01 | awk '{printf("%s\t%s\n",$1,$2)}' > perf_results.out;
+        #else:
+            cat perf.out  | awk '{printf("%s\t%s\n",$1,$2)}' > perf_results.out;
+        #end if
+
+    </command>
+    <inputs>
+        <param name="infile" format="tabular" type="data" label="File to select" help="1st col targets, 2nd col predictions (-infile)"/>
+
+        <param name="performance_measures" multiple="True" type="select" display="checkboxes" label="Select a pre-defined filtering set">
+            <option value="-ACC" selected="True">Accuracy</option>
+            <option value="-RMS">Root Mean Squared Error</option>
+            <option value="-CXE">Mean Cross-Entropy</option>
+            <option value="-ROC" selected="True">ROC area</option>
+            <option value="-R50">ROC area up to 50 negative examples</option>
+            <option value="-SEN">Sensitivity</option>
+            <option value="-SPC">Specificity</option>
+            <option value="-NPV">Negative Predictive Value</option>
+            <option value="-PPV">Positive Predictive Value</option>
+            <option value="-PRE">Precision</option>
+            <option value="-REC">Recall</option>
+            <option value="-PRF">F1 score</option>
+            <option value="-PRB">Precision/Recall Break Even Point</option>
+            <option value="-APR" selected="True">Mean Average Precision</option>
+            <!--option value="-LFT">Lift (at threshold)</option>
+            <option value="-TOP1">Top 1: is the top ranked case positive</option>
+            <option value="-TOP10">Top 10: is there a positive in the top 10 ranked cases</option>
+            <option value="-NTOP"> How many positives in the top N ranked cases</option>
+            <option value="-RKL">Rank of *last* (poorest ranked) positive case</option>
+            <option value="-NRM">Norm error using  metric</option>
+            <option value="-CST">Total cost using these cost values, plus min-cost results</option-->
+            <!--option value="-SAR">typically wACC = wROC = wRMS = 1.0</option-->
+            <!--option value="-CAL">CA1/CA2 scores</option-->
+            <!--option value="-SLQ">Slac Q-score</option-->
+        </param>
+
+        <param name="plot" type="select" label="Plotting type">
+            <option value="" selected="True">No plot</option>
+            <option value="-plot roc">ROC plot</option>
+            <option value="-plor pr">Precision/Recall plot</option>
+            <option value="-plot lift">Lift versus threshold plot</option>
+            <option value="-plor cost">Cost versus threshold plot</option>
+            <option value="-plor acc">Accuracy versus threshold plot</option>
+        </param>
+
+        <param name="threshold" size="4" type="float" min="0" value="0.5" label="Threshold"/>
+
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="outfile" from_work_dir="perf_results.out" label="Performance measures from ${on_string}" />
+        <data format="tabular" name="outfile_plotting" from_work_dir="perf_plotting_data.out" label="Performance values from ${on_string}">
+            <filter>plot is not ''</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="tabular" value="testperf.dat"/>
+            <param name="performance_measures" value="-ACC,-ROC,-APR" />
+            <output name="outfile" ftype="tabular" file="testperf.results" />
+            <output name="outfile_plotting" ftype="tabular" file="testperf.results.plot" />
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+Perf calculates a variety of performance metrics suitable for boolean classification problems. Metrics include: accuracy, root-mean-squared-error, cross-entropy, precision, recall, precision/recall break-even point and F-score, area under the ROC curve, lift, weighted cost, top 1, top 10, rank of lowest positive case, q-score, several measures of probability calibration, etc. 
+
+For more information please refer to:
+
+http://osmot.cs.cornell.edu/kddcup/software.html
+
+    </help>
+</tool>