Mercurial > repos > bgruening > eden_vectorizer
diff eden_vectorizer.xml @ 0:95482139106c draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
| author | bgruening |
|---|---|
| date | Mon, 02 May 2016 16:17:33 -0400 |
| parents | |
| children | 3df106e75b33 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eden_vectorizer.xml Mon May 02 16:17:33 2016 -0400 @@ -0,0 +1,92 @@ +<tool id="eden_vectorizer" name="Transform" version="@VERSION@"> + <description>real vector graphs to sparse vectors</description> + <macros> + <import>main_macros.xml</import> + </macros> + <expand macro="python_requirements"/> + <expand macro="macro_stdio"/> + <version_command>echo "@VERSION@"</version_command> + <command> + <![CDATA[ + python "$vectorizer_script" '$inputs' + ]]> + </command> + <configfiles> + <inputs name="inputs"/> + <configfile name="vectorizer_script"> + <![CDATA[ +import sys +import json +import networkx +import numpy as np +from scipy.sparse import csr_matrix +from scipy.io import mmwrite + +from eden.graph import Vectorizer +from eden.converter.graph.node_link_data import node_link_data_to_eden + +input_json_path = sys.argv[1] +params = json.load(open(input_json_path, "r")) + +options = { k : v for k,v in params["options"].iteritems() if v} + +graph_list = node_link_data_to_eden("$infile") + +vectorizer = Vectorizer(**options) + +sparse_representation = vectorizer.transform(graph_list) +mmwrite(open("$outfile" , 'w+'), sparse_representation) + ]]> + </configfile> + </configfiles> + <inputs> + <param name="infile" type="data" format="json" label="Graph data" help="JSON representation of graphs."/> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="r" type="integer" optional="true" value="3" label="Maximum radius size" help="The maximal radius size in pairwise neighborhood. ( unit : node )"/> + <param argument="d" type="integer" optional="true" value="3" label="Maximum distance size" help="The maximal distance size in pairwise neighborhood. ( unit : node )"/> + <param argument="n" type="integer" optional="true" value="1" label="Maximum number of clusters" help="The maximal number of clusters used to discretize real label vectors."/> + <param argument="min_r" type="integer" optional="true" value="0" label="Minimal radius size" help="The minimal radius size in pairwise neighborhood."/> + <param argument="min_d" type="integer" optional="true" value="0" label="Minimal distance size" help="The minimal distance size in pairwise neighborhood."/> + <param argument="min_n" type="integer" optional="true" value="2" label="Minimal number of clusters" help="The minimal number of clusters used to discretize real label vectors ( default : 2 )"/> + <param argument="label_size" type="integer" optional="true" value="1" label="Label size" help="The number of discretization steps used in the conversion from real valued labels to discrete labels."/> + <param argument="nbits" type="integer" optional="true" value="20" label="Feature space size" help="The number of bits that defines the feature space size: |feature space|=2^nbits"/> + <param argument="normalization" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Normalization" help="Flag to set the resulting feature vector to have unit euclidean norm."/> + <param argument="inner_normalization" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Combine radius and distance size to normalize" help="Flag to set the feature vector for a specific combination of the radius and distance size to have unit euclidean norm (default True). + When used together with the 'normalization' flag it will be applied first and then the resulting feature vector will be normalized."/> + <param argument="triangular_decomposition" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Process dense graphs" help=""/> + </section> + </inputs> + <outputs> + <data format="txt" name="outfile"/> + </outputs> + <tests> + <test> + <param name="infile" value="converter_result01.json" ftype="json"/> + <output name="outfile" file="vectorizer_result01.mtx"/> + </test> + <test> + <param name="infile" value="converter_result02.json" ftype="json"/> + <output name="outfile" file="vectorizer_result02.mtx"/> + </test> + <test> + <param name="infile" value="converter_result02.json" ftype="json"/> + <param name="complexity" value="5"/> + <param name="r" value="4"/> + <output name="outfile" file="vectorizer_result03.mtx"/> + </test> + <test> + <param name="infile" value="converter_result02.json" ftype="json"/> + <param name="complexity" value="5"/> + <param name="r" value="4"/> + <output name="outfile" file="vectorizer_result04.mtx"/> + </test> + </tests> + <help> + <![CDATA[ + **What it does** + This tool transforms real vector, labeled, weighted, nested graphs to sparse vectors. + For more information check http://dx.doi.org/10.5281/zenodo.15094 + ]]> + </help> + <expand macro="eden_citation"/> +</tool>
