annotate eden_vectorizer.xml @ 5:4338e8aac25b draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a9f28163f0d2e808e49c43a6df5a040706e79991
author bgruening
date Thu, 23 Jun 2016 15:27:02 -0400
parents 3df106e75b33
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
1 <tool id="eden_vectorizer" name="Transform" version="@VERSION@">
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
2 <description>real vector graphs to sparse vectors</description>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
3 <macros>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
4 <import>main_macros.xml</import>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
5 </macros>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
6 <expand macro="python_requirements"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
7 <expand macro="macro_stdio"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
8 <version_command>echo "@VERSION@"</version_command>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
9 <command>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
10 <![CDATA[
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
11 python "$vectorizer_script" '$inputs'
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
12 ]]>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
13 </command>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
14 <configfiles>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
15 <inputs name="inputs"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
16 <configfile name="vectorizer_script">
4
3df106e75b33 planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 7ce0f8d46dc6d468065b6c2db79864b6d542a898
bgruening
parents: 0
diff changeset
17 <![CDATA[
0
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
18 import sys
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
19 import json
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
20 import networkx
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
21 import numpy as np
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
22 from scipy.sparse import csr_matrix
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
23 from scipy.io import mmwrite
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
24
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
25 from eden.graph import Vectorizer
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
26 from eden.converter.graph.node_link_data import node_link_data_to_eden
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
27
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
28 input_json_path = sys.argv[1]
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
29 params = json.load(open(input_json_path, "r"))
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
30
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
31 options = { k : v for k,v in params["options"].iteritems() if v}
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
32
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
33 graph_list = node_link_data_to_eden("$infile")
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
34
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
35 vectorizer = Vectorizer(**options)
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
36
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
37 sparse_representation = vectorizer.transform(graph_list)
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
38 mmwrite(open("$outfile" , 'w+'), sparse_representation)
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
39 ]]>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
40 </configfile>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
41 </configfiles>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
42 <inputs>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
43 <param name="infile" type="data" format="json" label="Graph data" help="JSON representation of graphs."/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
44 <section name="options" title="Advanced Options" expanded="False">
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
45 <param argument="r" type="integer" optional="true" value="3" label="Maximum radius size" help="The maximal radius size in pairwise neighborhood. ( unit : node )"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
46 <param argument="d" type="integer" optional="true" value="3" label="Maximum distance size" help="The maximal distance size in pairwise neighborhood. ( unit : node )"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
47 <param argument="n" type="integer" optional="true" value="1" label="Maximum number of clusters" help="The maximal number of clusters used to discretize real label vectors."/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
48 <param argument="min_r" type="integer" optional="true" value="0" label="Minimal radius size" help="The minimal radius size in pairwise neighborhood."/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
49 <param argument="min_d" type="integer" optional="true" value="0" label="Minimal distance size" help="The minimal distance size in pairwise neighborhood."/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
50 <param argument="min_n" type="integer" optional="true" value="2" label="Minimal number of clusters" help="The minimal number of clusters used to discretize real label vectors ( default : 2 )"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
51 <param argument="label_size" type="integer" optional="true" value="1" label="Label size" help="The number of discretization steps used in the conversion from real valued labels to discrete labels."/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
52 <param argument="nbits" type="integer" optional="true" value="20" label="Feature space size" help="The number of bits that defines the feature space size: |feature space|=2^nbits"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
53 <param argument="normalization" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Normalization" help="Flag to set the resulting feature vector to have unit euclidean norm."/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
54 <param argument="inner_normalization" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Combine radius and distance size to normalize" help="Flag to set the feature vector for a specific combination of the radius and distance size to have unit euclidean norm (default True).
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
55 When used together with the 'normalization' flag it will be applied first and then the resulting feature vector will be normalized."/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
56 <param argument="triangular_decomposition" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Process dense graphs" help=""/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
57 </section>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
58 </inputs>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
59 <outputs>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
60 <data format="txt" name="outfile"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
61 </outputs>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
62 <tests>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
63 <test>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
64 <param name="infile" value="converter_result01.json" ftype="json"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
65 <output name="outfile" file="vectorizer_result01.mtx"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
66 </test>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
67 <test>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
68 <param name="infile" value="converter_result02.json" ftype="json"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
69 <output name="outfile" file="vectorizer_result02.mtx"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
70 </test>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
71 <test>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
72 <param name="infile" value="converter_result02.json" ftype="json"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
73 <param name="complexity" value="5"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
74 <param name="r" value="4"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
75 <output name="outfile" file="vectorizer_result03.mtx"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
76 </test>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
77 <test>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
78 <param name="infile" value="converter_result02.json" ftype="json"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
79 <param name="complexity" value="5"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
80 <param name="r" value="4"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
81 <output name="outfile" file="vectorizer_result04.mtx"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
82 </test>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
83 </tests>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
84 <help>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
85 <![CDATA[
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
86 **What it does**
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
87 This tool transforms real vector, labeled, weighted, nested graphs to sparse vectors.
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
88 For more information check http://dx.doi.org/10.5281/zenodo.15094
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
89 ]]>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
90 </help>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
91 <expand macro="eden_citation"/>
95482139106c planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
bgruening
parents:
diff changeset
92 </tool>