Mercurial > repos > bgruening > text_processing

diff awk.xml @ 6:8928e6d1e7ba draft
Uploaded
author: bgruening
date: Thu, 08 Jan 2015 09:07:31 -0500
parents: 56e80527c482
children: d64eace4f9f3
--- a/awk.xml	Wed Jan 07 11:15:41 2015 -0500
+++ b/awk.xml	Thu Jan 08 09:07:31 2015 -0500
@@ -11,17 +11,17 @@
 <![CDATA[
         awk
             --sandbox
-            -v FS=\$'\t'
-            -v OFS=\$'\t'
+            -v FS='	'
+            -v OFS='	'
             --re-interval
-            -f '$awk_script'
-            "$input"
-        > "$output"
+            -f "$awk_script"
+            "$infile"
+        > "$outfile"
 ]]>
     </command>
     <inputs>
-        <param format="txt" name="input" type="data" label="File to process" />
-        <param name="url_paste" type="text" area="true" size="5x35" label="AWK Program" help="">
+        <param name="infile" format="txt" type="data" label="File to process" />
+        <param name="code" type="text" area="true" size="5x35" label="AWK Program" help="">
             <sanitizer>
                 <valid initial="string.printable">
                     <remove value="&apos;"/>
@@ -30,21 +30,19 @@
         </param>
     </inputs>
     <configfiles>
-        <configfile name="awk_script">
-            $url_paste
-        </configfile>
+        <configfile name="awk_script">$code</configfile>
     </configfiles>
     <outputs>
-        <data format="input" name="output" metadata_source="input"/>
+        <data name="outfile" format_source="infile" metadata_source="infile"/>
     </outputs>
     <tests>
-      <test>
-          <param name="input" value="unix_awk_input1.txt" />
-          <param name="awk_script" value="$2>0.5 { print $2*9, $1 }" />
-          <output name="output" file="unix_awk_output1.txt" />
-      </test>
+        <test>
+            <param name="infile" value="awk1.txt" />
+            <!-- commas are not allowed in a value field. Values with comma will be splitted -->
+            <param name="code" value='$2>0.5 { print $2*9"\t"$1 }' />
+            <output name="outfile" file="awk_results1.txt" />
+        </test>
     </tests>
-
     <help>
 <![CDATA[
 **What it does**
@@ -78,18 +76,16 @@
     pattern { action 1; action 2; action 3; }
 
 
-
 **Pattern Examples**
 
 - **$2 == "chr3"**  will match lines whose second column is the string 'chr3'
 - **$5-$4>23**  will match lines that after subtracting the value of the fourth column from the value of the fifth column, gives value alrger than 23.
 - **/AG..AG/** will match lines that contain the regular expression **AG..AG** (meaning the characeters AG followed by any two characeters followed by AG). (This is the way to specify regular expressions on the entire line, similar to GREP.)
 - **$7 ~ /A{4}U/**  will match lines whose seventh column contains 4 consecutive A's followed by a U. (This is the way to specify regular expressions on a specific field.)
-- **10000 &lt; $4 &amp;&amp; $4 &lt; 20000** will match lines whose fourth column value is larger than 10,000 but smaller than 20,000
+- **10000 < $4 && $4 < 20000** will match lines whose fourth column value is larger than 10,000 but smaller than 20,000
 - If no pattern is specified, all lines match (meaning the **action** part will be executed on all lines).
 
 
-
 **Action Examples**
 
 - **{ print }** or **{ print $0 }**   will print the entire input line (the line that matched in **pattern**). **$0** is a special marker meaning 'the entire line'.
@@ -98,7 +94,6 @@
 - If no action part is specified (not even the curly brackets) - the default action is to print the entire line.
 
 
-
 **AWK's Regular Expression Syntax**
 
 The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
author	bgruening
date	Thu, 08 Jan 2015 09:07:31 -0500
parents	56e80527c482
children	d64eace4f9f3