view EDeN_cross_validation.xml @ 11:bf63bd4cf462 draft default tip

Uploaded
author bgruening
date Thu, 15 May 2014 17:25:44 -0400
parents 5be8af51780d
children
line wrap: on
line source

<tool id="bg_eden_cross_validation" name="EDeN Crossvalidation" version="0.1">
    <description></description>
    <macros>
        <import>eden_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command>
        EDeN --action CROSS_VALIDATION

        --input_data_file_name  $sparse_vector_infile
        --file_type "SPARSE_VECTOR"

        ## target_file_name is a file with 1 or -1 one in each row, indicating the class
        --target_file_name $target_infile
        --binary_file_type

        --num_cross_validation_folds ${num_cross_validation_folds}
        ;
        cat cv_predictions | tr ' ' \\t > $outfile;

    </command>
    <inputs>
        <param format="eden_sparse_vector" name="sparse_vector_infile" type="data" label="Input File" help="(--input_data_file_name/-f)"/>
        <param format="txt" name="target_infile" type="data" label="Target file" help="indicates with -1 and 1 the class"/>

        <param name="num_cross_validation_folds" type="integer" value="10" label="Number of cross validations" help="--num_cross_validation_folds/-c">
            <validator type="in_range" min="1" />
        </param>
    </inputs>
    <outputs>
        <data format="tabular" name="outfile" label="Crossvalidation of ${on_string}"/>
    </outputs>
    <tests>
        <test>
        </test>
    </tests>
    <help>

.. class:: infomark

**What it does** 

The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun.
When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances.
If the target information is imbalanced a minority class resampling technique is used to rebalance the training set.

@references@

    </help>
</tool>