annotate easyjoin.xml @ 0:ec66f9d90ef0 draft

initial uploaded
author bgruening
date Thu, 05 Sep 2013 04:58:21 -0400
parents
children 7068d1548234
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
1 <tool id="unixtools_easyjoin_tool" name="Join" version="0.1.1">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
2 <requirements>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
3 <requirement type="package" version="8.21">gnu_coreutils</requirement>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
4 </requirements>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
5 <description>two files</description>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
6 <command interpreter="perl">easyjoin $jointype
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
7 -t ' '
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
8 $header
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
9 -e '$empty_string_filler'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
10 -o auto
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
11 $ignore_case
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
12 -1 '$column1'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
13 -2 '$column2'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
14 "$input1" "$input2"
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
15 &gt; '$output'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
16 </command>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
17
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
18 <inputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
19 <param format="txt" name="input1" type="data" label="1st file" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
20 <param name="column1" label="Column to use from 1st file" type="data_column" data_ref="input1" accept_default="true" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
21
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
22 <param format="txt" name="input2" type="data" label="2nd File" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
23 <param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="input2" accept_default="true" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
24
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
25 <param name="jointype" type="select" label="Output lines appearing in">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
26 <option value=" ">BOTH 1st &amp; 2nd file.</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
27 <option value="-v 1">1st but not in 2nd file. [-v 1]</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
28 <option value="-v 2">2nd but not in 1st file. [-v 2]</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
29 <option value="-a 1">both 1st &amp; 2nd file, plus unpairable lines from 1st file. [-a 1]</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
30 <option value="-a 2">both 1st &amp; 2nd file, plus unpairable lines from 2st file. [-a 2]</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
31 <option value="-a 1 -a 2">All Lines [-a 1 -a 2]</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
32 <option value="-v 1 -v 2">All unpairable lines [-v 1 -v 2]</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
33 </param>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
34
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
35 <param name="header" type="boolean" checked="false" truevalue="--header" falsevalue="" label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
36
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
37 <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
38
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
39 <param name="empty_string_filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
40 <sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
41 <valid initial="string.printable">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
42 <remove value="&apos;"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
43 </valid>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
44 </sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
45 </param>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
46
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
47 </inputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
48 <outputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
49 <data name="output" format="input" metadata_source="input1"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
50 </outputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
51
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
52 <help>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
53 **What it does**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
54
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
55 This tool joins two tabular files based on a common key column.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
56
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
57 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
58
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
59 **Example**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
60
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
61 **First file**::
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
62
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
63 Fruit Color
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
64 Apple red
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
65 Banana yellow
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
66 Orange orange
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
67 Melon green
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
68
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
69 **Second File**::
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
70
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
71 Fruit Price
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
72 Orange 7
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
73 Avocado 8
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
74 Apple 4
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
75 Banana 3
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
76
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
77 **Joining** both files, using **key column 1** and a **header line**, will return::
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
78
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
79 Fruit Color Price
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
80 Apple red 4
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
81 Avocado . 8
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
82 Banana yellow 3
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
83 Melon green .
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
84 Orange orange 7
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
85
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
86 # Input files need not be sorted.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
87 # The header line (**Fruit Color Price**) was joined and kept as first line.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
88 # Missing values ( Avocado's color, missing from the first file ) are replaced with a period character.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
89
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
90 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
91
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
92 *easyjoin* was written by A. Gordon
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
93
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
94 </help>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
95 </tool>