Mercurial > repos > iuc > collection_column_join
comparison collection_column_join.xml @ 3:a01da64e34a1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/collection_column_join commit 9f1c3ab3f41bab8ff962aca1478c75e538e5bf6a
| author | iuc |
|---|---|
| date | Fri, 06 Apr 2018 03:44:04 -0400 |
| parents | e1ce461b6dae |
| children | b9d49988b597 |
comparison
equal
deleted
inserted
replaced
| 2:e1ce461b6dae | 3:a01da64e34a1 |
|---|---|
| 1 <tool id="collection_column_join" name="Column Join" version="0.0.2"> | 1 <tool id="collection_column_join" name="Column Join" version="0.0.3"> |
| 2 <description>on Collections</description> | 2 <description>on Collections</description> |
| 3 <requirements> | 3 <requirements> |
| 4 <requirement type="package" version="8.25">coreutils</requirement> | 4 <requirement type="package" version="8.25">coreutils</requirement> |
| 5 </requirements> | 5 </requirements> |
| 6 <command detect_errors="exit_code"><![CDATA[ | 6 <command detect_errors="exit_code"><![CDATA[ |
| 17 touch output0.tmp && | 17 touch output0.tmp && |
| 18 #set $delimiter = '\t' | 18 #set $delimiter = '\t' |
| 19 #set $left_identifier_column = $identifier_column | 19 #set $left_identifier_column = $identifier_column |
| 20 #set $tail_offset = int( str( $has_header ) ) + 1 | 20 #set $tail_offset = int( str( $has_header ) ) + 1 |
| 21 #for $i, $tabular_item in enumerate( $input_tabular ): | 21 #for $i, $tabular_item in enumerate( $input_tabular ): |
| 22 #if $has_header: | 22 #if $old_col_in_header: |
| 23 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && | 23 #if $has_header: |
| 24 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && | 24 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && |
| 25 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && | |
| 26 #else: | |
| 27 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && | |
| 28 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && | |
| 29 #end if | |
| 25 #else: | 30 #else: |
| 26 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && | 31 #if $has_header: |
| 27 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && | 32 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}" ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && |
| 33 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && | |
| 34 #else: | |
| 35 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}"); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && | |
| 36 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && | |
| 37 #end if | |
| 28 #end if | 38 #end if |
| 29 #if $i == 0: | 39 #if $i == 0: |
| 30 mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp && | 40 mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp && |
| 31 #if $has_header: | 41 #if $has_header: |
| 32 awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp && | 42 awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp && |
| 46 <inputs> | 56 <inputs> |
| 47 <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/> | 57 <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/> |
| 48 <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> --> | 58 <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> --> |
| 49 <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/> | 59 <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/> |
| 50 <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/> | 60 <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/> |
| 61 <param name="old_col_in_header" type="boolean" checked="true" label="Keep original column header" help="Disable if you want columns headers to be only composed of the input dataset names"/> | |
| 51 <param name="fill_char" type="text" value="." optional="False" label="Fill character"/> | 62 <param name="fill_char" type="text" value="." optional="False" label="Fill character"/> |
| 52 <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create"> | 63 <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create"> |
| 53 <option value="output_shell_script" selected="false">Shell script</option> | 64 <option value="output_shell_script" selected="false">Shell script</option> |
| 54 </param> | 65 </param> |
| 55 </inputs> | 66 </inputs> |
| 62 <tests> | 73 <tests> |
| 63 <test> | 74 <test> |
| 64 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> | 75 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> |
| 65 <param name="identifier_column" value="1"/> | 76 <param name="identifier_column" value="1"/> |
| 66 <param name="has_header" value="1"/> | 77 <param name="has_header" value="1"/> |
| 78 <param name="old_col_in_header" value="true"/> | |
| 67 <param name="fill_char" value="."/> | 79 <param name="fill_char" value="."/> |
| 68 <param name="include_outputs" /> | 80 <param name="include_outputs" /> |
| 69 <output name="tabular_output" file="out_1.tabular" ftype="tabular"/> | 81 <output name="tabular_output" file="out_1.tabular" ftype="tabular"/> |
| 70 </test> | 82 </test> |
| 71 <test> | 83 <test> |
| 72 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> | 84 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> |
| 73 <param name="identifier_column" value="1"/> | 85 <param name="identifier_column" value="1"/> |
| 74 <param name="has_header" value="0"/> | 86 <param name="has_header" value="0"/> |
| 87 <param name="old_col_in_header" value="true"/> | |
| 75 <param name="fill_char" value="."/> | 88 <param name="fill_char" value="."/> |
| 76 <param name="include_outputs" /> | 89 <param name="include_outputs" /> |
| 77 <output name="tabular_output" file="out_2.tabular" ftype="tabular"/> | 90 <output name="tabular_output" file="out_2.tabular" ftype="tabular"/> |
| 91 </test> | |
| 92 <test> | |
| 93 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> | |
| 94 <param name="identifier_column" value="1"/> | |
| 95 <param name="has_header" value="1"/> | |
| 96 <param name="old_col_in_header" value="false"/> | |
| 97 <param name="fill_char" value="."/> | |
| 98 <param name="include_outputs" /> | |
| 99 <output name="tabular_output" file="out_3.tabular" ftype="tabular"/> | |
| 100 </test> | |
| 101 <test> | |
| 102 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> | |
| 103 <param name="identifier_column" value="1"/> | |
| 104 <param name="has_header" value="0"/> | |
| 105 <param name="old_col_in_header" value="false"/> | |
| 106 <param name="fill_char" value="."/> | |
| 107 <param name="include_outputs" /> | |
| 108 <output name="tabular_output" file="out_4.tabular" ftype="tabular"/> | |
| 78 </test> | 109 </test> |
| 79 </tests> | 110 </tests> |
| 80 <help> | 111 <help> |
| 81 <![CDATA[ | 112 <![CDATA[ |
| 82 Joins lists of tabular datasets together on a field. | 113 Joins lists of tabular datasets together on a field. |
| 115 #KEY in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4 | 146 #KEY in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4 |
| 116 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 | 147 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 |
| 117 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 | 148 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 |
| 118 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 | 149 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 |
| 119 | 150 |
| 151 | |
| 152 **Joining** the files, using **identifier column of 1** and a **header lines of 1**, but disabling **Keep original column header**, will return:: | |
| 153 | |
| 154 #KEY in_1.tabular in_1.tabular in_1.tabular in_2.tabular in_2.tabular in_2.tabular in_3.tabular in_3.tabular in_3.tabular | |
| 155 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 | |
| 156 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 | |
| 157 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 | |
| 158 | |
| 120 ]]> | 159 ]]> |
| 121 </help> | 160 </help> |
| 122 <citations> | 161 <citations> |
| 123 </citations> | 162 </citations> |
| 124 </tool> | 163 </tool> |
