Mercurial > repos > iuc > collection_column_join
comparison collection_column_join.xml @ 0:7046aa921fed draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/collection_column_join commit 3b918f5a99ea13ec5acc7cc5fdd310fadb773ac0
| author | iuc |
|---|---|
| date | Thu, 26 May 2016 16:34:38 -0400 |
| parents | |
| children | 53fb5ce7ad1f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7046aa921fed |
|---|---|
| 1 <tool id="collection_column_join" name="Column Join" version="0.0.1"> | |
| 2 <description> | |
| 3 on Collections | |
| 4 </description> | |
| 5 <requirements> | |
| 6 <requirement type="package" version="8.22">gnu_coreutils</requirement> | |
| 7 </requirements> | |
| 8 <stdio> | |
| 9 <exit_code range="1:" /> | |
| 10 <exit_code range=":-1" /> | |
| 11 </stdio> | |
| 12 <command><![CDATA[ | |
| 13 #if "output_shell_script" in str( $include_outputs ).split( "," ): | |
| 14 cp "${collection_column_join_script}" "${script_output}" && | |
| 15 #end if | |
| 16 sh "${collection_column_join_script}" | |
| 17 ]]> | |
| 18 </command> | |
| 19 <configfiles> | |
| 20 <configfile name="collection_column_join_script"><![CDATA[ | |
| 21 #!/bin/sh | |
| 22 touch header0.tmp && | |
| 23 touch output0.tmp && | |
| 24 #set $delimiter = '\t' | |
| 25 #set $left_identifier_column = $identifier_column | |
| 26 #set $tail_offset = int( str( $has_header ) ) + 1 | |
| 27 #for $i, $tabular_item in enumerate( $input_tabular ): | |
| 28 #if $has_header: | |
| 29 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && | |
| 30 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && | |
| 31 #else: | |
| 32 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && | |
| 33 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && | |
| 34 #end if | |
| 35 #if $i == 0: | |
| 36 mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp && | |
| 37 #if $has_header: | |
| 38 awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp && | |
| 39 #else: | |
| 40 echo "#KEY" > header${ $i % 2 }.tmp && | |
| 41 #end if | |
| 42 #else: | |
| 43 LC_ALL=C join -o auto -a 1 -a 2 -1 ${left_identifier_column} -2 ${identifier_column} -t "${delimiter}" -e "${fill_char}" output${ $i % 2 }.tmp input_file.tmp > output${ ( $i + 1 ) % 2 }.tmp && | |
| 44 #set $left_identifier_column = 1 | |
| 45 #end if | |
| 46 paste -d "${delimiter}" header${ $i % 2 }.tmp input_header.tmp > header${ ( $i + 1 ) % 2 }.tmp && | |
| 47 #end for | |
| 48 cat header${ ( $i + 1 ) % 2 }.tmp output${ ( $i + 1 ) % 2 }.tmp > "${tabular_output}" | |
| 49 ]]> | |
| 50 </configfile> | |
| 51 </configfiles> | |
| 52 <inputs> | |
| 53 <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/> | |
| 54 <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> --> | |
| 55 <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/> | |
| 56 <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/> | |
| 57 <param name="fill_char" type="text" value="." optional="False" label="Fill character"/> | |
| 58 <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create"> | |
| 59 <option value="output_shell_script" selected="false">Shell script</option> | |
| 60 </param> | |
| 61 </inputs> | |
| 62 <outputs> | |
| 63 <data format="tabular" name="tabular_output"/> | |
| 64 <data format="txt" name="script_output"> | |
| 65 <filter>include_outputs and "output_shell_script" in include_outputs</filter> | |
| 66 </data> | |
| 67 </outputs> | |
| 68 <tests> | |
| 69 <test> | |
| 70 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> | |
| 71 <param name="identifier_column" value="1"/> | |
| 72 <param name="has_header" value="1"/> | |
| 73 <param name="fill_char" value="."/> | |
| 74 <param name="include_outputs" /> | |
| 75 <output name="tabular_output" file="out_1.tabular" ftype="tabular"/> | |
| 76 </test> | |
| 77 <test> | |
| 78 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> | |
| 79 <param name="identifier_column" value="1"/> | |
| 80 <param name="has_header" value="0"/> | |
| 81 <param name="fill_char" value="."/> | |
| 82 <param name="include_outputs" /> | |
| 83 <output name="tabular_output" file="out_2.tabular" ftype="tabular"/> | |
| 84 </test> | |
| 85 </tests> | |
| 86 <help> | |
| 87 <![CDATA[ | |
| 88 Joins lists of tabular datasets together on a field. | |
| 89 | |
| 90 ----- | |
| 91 | |
| 92 **Example** | |
| 93 | |
| 94 To join three files, with headers, based on the first column: | |
| 95 | |
| 96 **First file (in_1.tabular)**:: | |
| 97 | |
| 98 #KEY c2 c3 c4 | |
| 99 one 1-1 1-2 1-3 | |
| 100 two 1-4 1-5 1-6 | |
| 101 three 1-7 1-8 1-9 | |
| 102 | |
| 103 | |
| 104 **Second File (in_2.tabular)**:: | |
| 105 | |
| 106 #KEY c2 c3 c4 | |
| 107 one 2-1 2-2 2-3 | |
| 108 two 2-4 2-5 2-6 | |
| 109 three 2-7 2-8 2-9 | |
| 110 | |
| 111 **Third file (in_3.tabular)**:: | |
| 112 | |
| 113 #KEY c2 c3 c4 | |
| 114 one 3-3 3-2 3-3 | |
| 115 two 3-4 3-5 3-6 | |
| 116 three 3-7 3-8 3-9 | |
| 117 | |
| 118 | |
| 119 **Joining** the files, using **identifier column of 1** and a **header lines of 1**, will return:: | |
| 120 | |
| 121 #KEY in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4 | |
| 122 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 | |
| 123 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 | |
| 124 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 | |
| 125 | |
| 126 ]]> | |
| 127 </help> | |
| 128 <citations> | |
| 129 </citations> | |
| 130 </tool> |
