collection_column_join: collection_column

comparison collection_column_join.xml @ 3:a01da64e34a1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/collection_column_join commit 9f1c3ab3f41bab8ff962aca1478c75e538e5bf6a

author	iuc
date	Fri, 06 Apr 2018 03:44:04 -0400
parents	e1ce461b6dae
children	b9d49988b597

comparison

equal deleted inserted replaced

-:e1ce461b6dae
+:a01da64e34a1
-<tool id="collection_column_join" name="Column Join" version="0.0.2">
+<tool id="collection_column_join" name="Column Join" version="0.0.3">
 <description>on Collections</description>
 <requirements>
 <requirement type="package" version="8.25">coreutils</requirement>
 </requirements>
 <command detect_errors="exit_code"><![CDATA[
 touch output0.tmp &&
 #set $delimiter = '\t'
 #set $left_identifier_column = $identifier_column
 #set $tail_offset = int( str( $has_header ) ) + 1
 #for $i, $tabular_item in enumerate( $input_tabular ):
-#if $has_header:
+#if $old_col_in_header:
-head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
+#if $has_header:
-tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
+head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
+tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
+#else:
+awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
+LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
+#end if
 #else:
-awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
+#if $has_header:
-LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
+head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}" ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
+tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
+#else:
+awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}"); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
+LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
+#end if
 #end if
 #if $i == 0:
 mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp &&
 #if $has_header:
 awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp &&
 <inputs>
 <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/>
 <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> -->
 <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/>
 <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/>
+<param name="old_col_in_header" type="boolean" checked="true" label="Keep original column header" help="Disable if you want columns headers to be only composed of the input dataset names"/>
 <param name="fill_char" type="text" value="." optional="False" label="Fill character"/>
 <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create">
 <option value="output_shell_script" selected="false">Shell script</option>
 </param>
 </inputs>
 <tests>
 <test>
 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/>
 <param name="identifier_column" value="1"/>
 <param name="has_header" value="1"/>
+<param name="old_col_in_header" value="true"/>
 <param name="fill_char" value="."/>
 <param name="include_outputs" />
 <output name="tabular_output" file="out_1.tabular" ftype="tabular"/>
 </test>
 <test>
 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/>
 <param name="identifier_column" value="1"/>
 <param name="has_header" value="0"/>
+<param name="old_col_in_header" value="true"/>
 <param name="fill_char" value="."/>
 <param name="include_outputs" />
 <output name="tabular_output" file="out_2.tabular" ftype="tabular"/>
+</test>
+<test>
+<param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/>
+<param name="identifier_column" value="1"/>
+<param name="has_header" value="1"/>
+<param name="old_col_in_header" value="false"/>
+<param name="fill_char" value="."/>
+<param name="include_outputs" />
+<output name="tabular_output" file="out_3.tabular" ftype="tabular"/>
+</test>
+<test>
+<param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/>
+<param name="identifier_column" value="1"/>
+<param name="has_header" value="0"/>
+<param name="old_col_in_header" value="false"/>
+<param name="fill_char" value="."/>
+<param name="include_outputs" />
+<output name="tabular_output" file="out_4.tabular" ftype="tabular"/>
 </test>
 </tests>
 <help>
 <![CDATA[
 Joins lists of tabular datasets together on a field.
 #KEY    in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4
 one     1-1              1-2            1-3             2-1              2-2             2-3             3-3             3-2             3-3
 three   1-7              1-8            1-9             2-7              2-8             2-9             3-7             3-8             3-9
 two     1-4              1-5            1-6             2-4              2-5             2-6             3-4             3-5             3-6
+**Joining** the files, using **identifier column of 1** and a **header lines of 1**, but disabling **Keep original column header**, will return::
+#KEY    in_1.tabular in_1.tabular in_1.tabular in_2.tabular in_2.tabular in_2.tabular in_3.tabular in_3.tabular in_3.tabular
+one     1-1              1-2            1-3             2-1              2-2             2-3             3-3             3-2             3-3
+three   1-7              1-8            1-9             2-7              2-8             2-9             3-7             3-8             3-9
+two     1-4              1-5            1-6             2-4              2-5             2-6             3-4             3-5             3-6
 ]]>
 </help>
 <citations>
 </citations>
 </tool>

Mercurial > repos > iuc > collection_column_join

comparison collection_column_join.xml @ 3:a01da64e34a1 draft