Mercurial > repos > bgruening > text_processing
diff multijoin.xml @ 0:ec66f9d90ef0 draft
initial uploaded
| author | bgruening |
|---|---|
| date | Thu, 05 Sep 2013 04:58:21 -0400 |
| parents | |
| children | a4ad586d1403 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multijoin.xml Thu Sep 05 04:58:21 2013 -0400 @@ -0,0 +1,122 @@ +<tool id="unixtools_multijoin'_tool" name="Multi-Join" version="0.1.1"> + <description>(combine multiple files)</description> + <command interpreter="perl">multijoin + --key '$key_column' + --values '$value_columns' + --filler '$filler' + $ignore_dups + $output_header + $input_header + #for $file in $files + '$file.filename' + #end for + > '$output' + </command> + + <inputs> + <repeat name="files" title="file to join"> + <param name="filename" label="Add file" type="data" format="txt" /> + </repeat> + + <param name="key_column" label="Common key column" type="integer" + value="1" help="Usually gene-ID or other common value" /> + + <param name="value_columns" label="Column with values to preserve" type="text" + value="2,3,4" help="Enter comma-separated list of columns, e.g. 3,6,8"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + + <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" /> + <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" /> + <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." /> + <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + + </inputs> + <outputs> + <data name="output" format="input" metadata_source="input1" /> + </outputs> + +<help> +**What it does** + +This tool joins multiple tabular files based on a common key column. + +----- + +**Example** + +To join three files, based on the 4th column, and keeping the 7th,8th,9th columns: + +**First file (AAA)**:: + + chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 + chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 + chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 + chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 + chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 + chr4 995793 996435 FBtr0111046 0 + 7 166 642 + chr4 995793 997931 FBtr0111044 0 + 28 683 2138 + chr4 995793 997931 FBtr0111045 0 + 28 683 2138 + chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690 + ... + + +**Second File (BBB)**:: + + chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 + chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 + chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 + chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 + chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 + chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 + chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 + chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 + chr4 252541 266528 FBtr0300797 0 + 56 1296 13987 + ... + +**Third file (CCC)**:: + + chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 + chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 + chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 + chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 + chr4 995793 996435 FBtr0111046 0 + 5 304 642 + chr4 995793 997931 FBtr0111044 0 + 17 714 2138 + chr4 995793 997931 FBtr0111045 0 + 17 714 2138 + chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690 + ... + + +**Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return:: + + key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9 + FBtr0089116 0 0 0 56 1296 15144 0 0 0 + FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 + FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 + FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 + FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 + FBtr0111044 28 683 2138 0 0 0 17 714 2138 + FBtr0111045 28 683 2138 0 0 0 17 714 2138 + FBtr0111046 7 166 642 0 0 0 5 304 642 + FBtr0300796 0 0 0 56 1296 14475 0 0 0 + ... + + +# Input files need not be sorted. + +----- + +*multijoin* was written by A. Gordon (gordon at cshl dot edu) + +</help> +</tool>
