diff eden_vectorizer.xml @ 0:95482139106c draft

planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
author bgruening
date Mon, 02 May 2016 16:17:33 -0400
parents
children 3df106e75b33
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eden_vectorizer.xml	Mon May 02 16:17:33 2016 -0400
@@ -0,0 +1,92 @@
+<tool id="eden_vectorizer" name="Transform" version="@VERSION@">
+    <description>real vector graphs to sparse vectors</description>
+    <macros>
+        <import>main_macros.xml</import>
+    </macros>
+    <expand macro="python_requirements"/>
+    <expand macro="macro_stdio"/>
+    <version_command>echo "@VERSION@"</version_command>
+    <command>
+        <![CDATA[
+    python "$vectorizer_script" '$inputs'
+        ]]>
+    </command>
+    <configfiles>
+        <inputs name="inputs"/>
+        <configfile name="vectorizer_script">
+            <![CDATA[
+import sys
+import json
+import networkx
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.io import mmwrite
+
+from eden.graph import Vectorizer
+from eden.converter.graph.node_link_data import node_link_data_to_eden
+
+input_json_path = sys.argv[1]
+params = json.load(open(input_json_path, "r"))
+
+options = { k : v for k,v in params["options"].iteritems() if v}
+
+graph_list = node_link_data_to_eden("$infile")
+
+vectorizer = Vectorizer(**options)
+
+sparse_representation = vectorizer.transform(graph_list)
+mmwrite(open("$outfile" , 'w+'), sparse_representation)
+            ]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="infile" type="data" format="json" label="Graph data" help="JSON representation of graphs."/>
+        <section name="options" title="Advanced Options" expanded="False">
+            <param argument="r" type="integer" optional="true" value="3" label="Maximum radius size" help="The maximal radius size in pairwise neighborhood. ( unit : node )"/>
+            <param argument="d" type="integer" optional="true" value="3" label="Maximum distance size" help="The maximal distance size in pairwise neighborhood. ( unit : node )"/>
+            <param argument="n" type="integer" optional="true" value="1" label="Maximum number of clusters" help="The maximal number of clusters used to discretize real label vectors."/>
+            <param argument="min_r" type="integer" optional="true" value="0" label="Minimal radius size" help="The minimal radius size in pairwise neighborhood."/>
+            <param argument="min_d" type="integer" optional="true" value="0" label="Minimal distance size" help="The minimal distance size in pairwise neighborhood."/>
+            <param argument="min_n" type="integer" optional="true" value="2" label="Minimal number of clusters" help="The minimal number of clusters used to discretize real label vectors ( default : 2 )"/>
+            <param argument="label_size" type="integer" optional="true" value="1" label="Label size" help="The number of discretization steps used in the conversion from real valued labels to discrete labels."/>
+            <param argument="nbits" type="integer" optional="true" value="20" label="Feature space size" help="The number of bits that defines the feature space size: |feature space|=2^nbits"/>
+            <param argument="normalization" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Normalization" help="Flag to set the resulting feature vector to have unit euclidean norm."/> 
+            <param argument="inner_normalization" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Combine radius and distance size to normalize" help="Flag to set the feature vector for a specific combination of the radius and distance size to have unit euclidean norm (default True).
+            When used together with the 'normalization' flag it will be applied first and then the resulting feature vector will be normalized."/>
+            <param argument="triangular_decomposition" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Process dense graphs" help=""/>
+        </section>
+    </inputs>
+    <outputs>
+        <data format="txt" name="outfile"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="converter_result01.json" ftype="json"/>
+            <output name="outfile" file="vectorizer_result01.mtx"/>
+        </test>
+        <test>
+            <param name="infile" value="converter_result02.json" ftype="json"/>
+            <output name="outfile" file="vectorizer_result02.mtx"/>
+        </test>
+        <test>
+            <param name="infile" value="converter_result02.json" ftype="json"/>
+            <param name="complexity" value="5"/>
+            <param name="r" value="4"/>
+            <output name="outfile" file="vectorizer_result03.mtx"/>
+        </test>
+        <test>
+            <param name="infile" value="converter_result02.json" ftype="json"/>
+            <param name="complexity" value="5"/>
+            <param name="r" value="4"/>
+            <output name="outfile" file="vectorizer_result04.mtx"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+        **What it does**
+        This tool transforms real vector, labeled, weighted, nested graphs to sparse vectors.
+        For more information check http://dx.doi.org/10.5281/zenodo.15094
+        ]]>
+    </help>
+    <expand macro="eden_citation"/>
+</tool>