annotate awk.xml @ 21:0e3b611245f7 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 9c4d4fe09cbbd818532d793d01d1cb16edbd496b-dirty"
author bgruening
date Sun, 15 Mar 2020 22:58:18 +0000
parents 1aa30b2c73c9
children 97a131fbeef4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
1e974b82380d planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 0ba37c1f33eeb1c77b4d9363d681fe522d9f7fe7
bgruening
parents: 15
diff changeset
1 <tool id="tp_awk_tool" name="Text reformatting" version="@BASE_VERSION@.1">
2
fc862d5bccaf Uploaded
bgruening
parents: 1
diff changeset
2 <description>with awk</description>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
3 <macros>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
4 <import>macros.xml</import>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
5 </macros>
13
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 12
diff changeset
6 <requirements>
18
1e974b82380d planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 0ba37c1f33eeb1c77b4d9363d681fe522d9f7fe7
bgruening
parents: 15
diff changeset
7 <requirement type="package" version="4.2.0">gawk</requirement>
13
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 12
diff changeset
8 </requirements>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
9 <version_command>awk --version | head -n 1</version_command>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
10 <command>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
11 <![CDATA[
20
1aa30b2c73c9 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 4f79443517baf378fbfe1f81be361d97f2938601
bgruening
parents: 18
diff changeset
12 env -i
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
13 awk
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
14 --sandbox
6
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
15 -v FS=' '
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
16 -v OFS=' '
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
17 --re-interval
21
0e3b611245f7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 9c4d4fe09cbbd818532d793d01d1cb16edbd496b-dirty"
bgruening
parents: 20
diff changeset
18 -f '$awk_script'
0e3b611245f7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 9c4d4fe09cbbd818532d793d01d1cb16edbd496b-dirty"
bgruening
parents: 20
diff changeset
19 '$infile'
0e3b611245f7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 9c4d4fe09cbbd818532d793d01d1cb16edbd496b-dirty"
bgruening
parents: 20
diff changeset
20 > '$outfile'
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
21 ]]>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
22 </command>
13
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 12
diff changeset
23 <configfiles>
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 12
diff changeset
24 <configfile name="awk_script">$code</configfile>
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 12
diff changeset
25 </configfiles>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
26 <inputs>
6
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
27 <param name="infile" format="txt" type="data" label="File to process" />
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
28 <param name="code" type="text" area="true" size="5x35" label="AWK Program" help="">
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
29 <sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
30 <valid initial="string.printable">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
31 <remove value="&apos;"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
32 </valid>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
33 </sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
34 </param>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
35 </inputs>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
36 <outputs>
6
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
37 <data name="outfile" format_source="infile" metadata_source="infile"/>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
38 </outputs>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
39 <tests>
6
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
40 <test>
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
41 <param name="infile" value="awk1.txt" />
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
42 <!-- commas are not allowed in a value field. Values with comma will be splitted -->
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
43 <param name="code" value='$2>0.5 { print $2*9"\t"$1 }' />
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
44 <output name="outfile" file="awk_results1.txt" />
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
45 </test>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
46 </tests>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
47 <help>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
48 <![CDATA[
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
49 **What it does**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
50
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
51 This tool runs the unix **awk** command on the selected data file.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
52
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
53 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
54
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
55 **TIP:**
1
a4ad586d1403 Uploaded
bgruening
parents: 0
diff changeset
56
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
57 This tool uses the **extended regular** expression syntax (not the perl syntax).
1
a4ad586d1403 Uploaded
bgruening
parents: 0
diff changeset
58 **\\d**, **\\w**, **\\s** etc. are **not** supported.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
59
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
60
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
61 **Further reading**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
62
14
7725ab6dab67 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 13
diff changeset
63 - Awk by Example (http://www.ibm.com/developerworks/linux/library/l-awk1/index.html)
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
64 - Long AWK tutorial (http://www.grymoire.com/Unix/Awk.html)
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
65
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
66 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
67
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
68 **AWK programs**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
69
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
70 Most AWK programs consist of **patterns** (i.e. rules that match lines of text) and **actions** (i.e. commands to execute when a pattern matches a line).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
71
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
72 The basic form of AWK program is::
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
73
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
74 pattern { action 1; action 2; action 3; }
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
75
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
76
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
77 **Pattern Examples**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
78
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
79 - **$2 == "chr3"** will match lines whose second column is the string 'chr3'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
80 - **$5-$4>23** will match lines that after subtracting the value of the fourth column from the value of the fifth column, gives value alrger than 23.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
81 - **/AG..AG/** will match lines that contain the regular expression **AG..AG** (meaning the characeters AG followed by any two characeters followed by AG). (This is the way to specify regular expressions on the entire line, similar to GREP.)
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
82 - **$7 ~ /A{4}U/** will match lines whose seventh column contains 4 consecutive A's followed by a U. (This is the way to specify regular expressions on a specific field.)
6
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
83 - **10000 < $4 && $4 < 20000** will match lines whose fourth column value is larger than 10,000 but smaller than 20,000
18
1e974b82380d planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 0ba37c1f33eeb1c77b4d9363d681fe522d9f7fe7
bgruening
parents: 15
diff changeset
84 - **BEGIN** will be executed once only, before the first input record is read.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
85 - If no pattern is specified, all lines match (meaning the **action** part will be executed on all lines).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
86
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
87
15
74aae7d6cb09 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 3103ebed1a420c7d3415b67ef532ea579edf9faa
bgruening
parents: 14
diff changeset
88
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
89 **Action Examples**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
90
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
91 - **{ print }** or **{ print $0 }** will print the entire input line (the line that matched in **pattern**). **$0** is a special marker meaning 'the entire line'.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
92 - **{ print $1, $4, $5 }** will print only the first, fourth and fifth fields of the input line.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
93 - **{ print $4, $5-$4 }** will print the fourth column and the difference between the fifth and fourth column. (If the fourth column was start-position in the input file, and the fifth column was end-position - the output file will contain the start-position, and the length).
15
74aae7d6cb09 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 3103ebed1a420c7d3415b67ef532ea579edf9faa
bgruening
parents: 14
diff changeset
94 - **{ FS = "," }** can be used to change the field separator (delimeter) for parsing the input file.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
95 - If no action part is specified (not even the curly brackets) - the default action is to print the entire line.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
96
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
97
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
98 **AWK's Regular Expression Syntax**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
99
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
100 The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
101
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
102 - **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
103 - **^** matches the beginning of a string(but not an internal line).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
104 - **(** .. **)** groups a particular pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
105 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
106
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
107 - **{n}** The preceding item is matched exactly n times.
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
108 - **{n,}** The preceding item ismatched n or more times.
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
109 - **{n,m}** The preceding item is matched at least n times but not more than m times.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
110
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
111 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
112 - **.** Matches any single character except a newline.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
113 - ***** The preceding item will be matched zero or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
114 - **?** The preceding item is optional and matched at most once.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
115 - **+** The preceding item will be matched one or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
116 - **^** has two meaning:
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
117 - matches the beginning of a line or string.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
118 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
119 - **$** matches the end of a line or string.
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
120 - **\|** Separates alternate possibilities.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
121
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
122 @REFERENCES@
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
123 ]]>
1
a4ad586d1403 Uploaded
bgruening
parents: 0
diff changeset
124 </help>
14
7725ab6dab67 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 13
diff changeset
125 <expand macro="citations" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
126 </tool>