Mercurial > repos > jjohnson > split_to_collection
comparison split_tabular_to_collection.xml @ 0:f6254e4e155e draft default tip
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/split_to_collection commit b2ce04dd96d8b00103c23b58a4c6539a6b30809a-dirty
| author | jjohnson |
|---|---|
| date | Thu, 26 Oct 2017 13:32:38 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f6254e4e155e |
|---|---|
| 1 <tool id="split_tabular_to_collection" name="Split Tabular into Collection" version="0.1.0"> | |
| 2 <description>by lines</description> | |
| 3 <command><![CDATA[ | |
| 4 #set $width = len(str($input.dataset.metadata.data_lines)) | |
| 5 #if $skip_comment_lines: | |
| 6 #set $skip = $input.dataset.metadata.comment_lines | |
| 7 #else | |
| 8 #set $skip = 0 | |
| 9 #end if | |
| 10 #set $offset = $skip + 1 | |
| 11 awk 'NR > $skip{ print \$0 > (sprintf("%s_%.${width}d","${input.name}",int((NR-${offset})/${lines})*${lines}) ) }' $input | |
| 12 ]]></command> | |
| 13 <inputs> | |
| 14 <param name="input" type="data" format="tabular" label="Tabular dataset to split"/> | |
| 15 <param name="lines" type="integer" value="1000" min="1" label="Number of lines per output dataset"/> | |
| 16 <param name="skip_comment_lines" type="boolean" truevalue="yes" falsevalue="no" checked="true" | |
| 17 label="Skip comment lines"/> | |
| 18 </inputs> | |
| 19 <outputs> | |
| 20 <collection name="output_set" type="list" label="${input.name} Split List"> | |
| 21 <discover_datasets pattern="__name__" ext="tabular" visible="false"/> | |
| 22 </collection> | |
| 23 </outputs> | |
| 24 <tests> | |
| 25 <test> | |
| 26 <param name="input" value="input.tsv" ftype="tabular"/> | |
| 27 <param name="lines" value="20"/> | |
| 28 <output_collection name="output_set" type="list"> | |
| 29 <element name="input.tsv_00"> | |
| 30 <assert_contents> | |
| 31 <has_text_matching expression="20\tt\tT" /> | |
| 32 </assert_contents> | |
| 33 </element> | |
| 34 <element name="input.tsv_00"> | |
| 35 <assert_contents> | |
| 36 <has_text_matching expression="21\tu\tU" /> | |
| 37 </assert_contents> | |
| 38 </element> | |
| 39 </output_collection> | |
| 40 </test> | |
| 41 </tests> | |
| 42 <help><![CDATA[ | |
| 43 Splits a tabular dataset into multiple datsets in a dataset collection. | |
| 44 This can be used in a workflow to process datasets in the collection in parallel. | |
| 45 | |
| 46 ]]></help> | |
| 47 </tool> |
