Mercurial > repos > iuc > obi_grep
changeset 5:16dc1dbb4cb2 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/obitools commit a01e3c562cf5d62af522b893a25abde6476a1f45
| author | iuc |
|---|---|
| date | Thu, 30 Oct 2025 16:01:34 +0000 |
| parents | 63b47deab5ff |
| children | |
| files | illuminapairedend.xml.orig macros.xml.orig ngsfilter.xml.orig obiannotate.xml.orig obiclean.xml.orig obiconvert.xml.orig obigrep.xml obigrep.xml.orig obisort.xml.orig obistat.xml.orig obitab.xml.orig obiuniq.xml.orig test-data/input_ngsfilter_extrafile.txt |
| diffstat | 13 files changed, 50 insertions(+), 2467 deletions(-) [+] |
line wrap: on
line diff
--- a/illuminapairedend.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,169 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_illumina_pairend" name="Illuminapairedend" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>Construct consensus reads from Illumina pair-end reads</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command> - <![CDATA[ - #if $inputfastq3p.ext.endswith(".gz") - gunzip -c '$inputfastq3p' > fastq3p.fastq && - gunzip -c '$inputfastq5p' > fastq5p.fastq && - #else - ln -s '$inputfastq3p' fastq3p.fastq && - ln -s '$inputfastq5p' fastq5p.fastq && - #end if - - illuminapairedend - ##--index-file= - #if $inputfastq3p.ext.startswith("fastqsolexa") - ##input file is in fastq nucleic format produced by solexa sequencer - --solexa - #else if $inputfastq3p.ext.startswith("fastqillumina") - ##input file is in fastq nucleic format produced by solexa sequencer - --illumina - #else - ## input file is in sanger fastq nucleic format (standard fastq) - --sanger - #end if - --without-progress-bar - --score-min='$score' - -r fastq3p.fastq - fastq5p.fastq - #if $inputfastq3p.ext.endswith(".gz") - | gzip -c - #end if - > '$output' - ]]> - </command> - <inputs> - <param name="inputfastq3p" type="data" format="fastq,fastq.gz" label="Read from file" help="file of 3p (1:) Illumina pair-end reads to assemble in sanger fastq nucleic format (standard fastq)" /> - <param name="inputfastq5p" type="data" format="fastq,fastq.gz" label="Read from file" help="file of 5p (2:) Illumina pair-end reads to assemble in sanger fastq nucleic format (standard fastq)" /> - <param name="score" type="float" value="40.0" label="minimum score for keeping aligment"/> - </inputs> - <outputs> - <data name="output" format_source="inputfastq3p" label="${tool.name} on ${on_string}: assembly results" /> - </outputs> - - <tests> - <test> - <param name="inputfastq3p" value="wolf_small.F.fastq" ftype="fastqsanger" /> - <param name="inputfastq5p" value="wolf_small.R.fastq" ftype="fastqsanger" /> - <param name="score" value="40.0" /> - <output name="output" file="illuminapairedend.output.fastq" ftype="fastqsanger" /> - </test> - <test> - <param name="inputfastq3p" value="wolf_small.F.fastq.gz" ftype="fastqsanger.gz" /> - <param name="inputfastq5p" value="wolf_small.R.fastq.gz" ftype="fastqsanger.gz" /> - <param name="score" value="40.0" /> - <output name="output" file="illuminapairedend.output.fastq.gz" ftype="fastqsanger.gz" decompress="true"/> - </test> - </tests> - - <help><![CDATA[ - -.. class:: warning - -**Warning:** -Sequence records corresponding to the same read pair must be in the same order in the two files - --------- - -.. class:: infomark - -**What it does** - -illuminapairedend aims at aligning the two reads of a pair-end library sequenced using an Illumina platform : - -\* If the two reads overlap, it returns the consensus sequence together with its quality -\* Otherwise, it concatenates sequence merging the forward read and the reversed-complemented reverse read. - -The program uses as input one or two fastq sequences reads files. - -\* If two files are used one of them must be specified using the -r option. Sequence records corresponding to the same read pair must be in the same order in the two files. -\* If just one file is provided, sequence records are supposed to be all of the same length. The first half of th e sequence is used as forward read, the second half is used as the reverse read. - -illuminapairedend align the forward sequence record with the reverse complement of the reverse sequence record. The alignment algorithm takes into account the base qualities. - -@OBITOOLS_LINK@ - -]]> - </help> - <expand macro="citation" /> - -</tool> -======= -<tool id="obi_illumina_pairend" name="Illuminapairedend - Assembling pair-end reads" version="@TOOL_VERSION@"> - <description>Construct consensus reads from Illumina pair-end reads</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command> - - <![CDATA[ - illuminapairedend - - --score-min='$score' - -r '$inputfastq3p' - '$inputfastq5p' > '$output' - - ]]> - - </command> - - <inputs> - <param name="inputfastq3p" type="data" format="fastq" label="Read from file" help="file of 3p (1:) Illumina pair-end reads to assemble in sanger fastq nucleic format (standard fastq)" /> - <param name="inputfastq5p" type="data" format="fastq" label="Read from file" help="file of 5p (2:) Illumina pair-end reads to assemble in sanger fastq nucleic format (standard fastq)" /> - <param name="score" type="float" value="40.0" label="minimum score for keeping aligment"/> - </inputs> - <outputs> - <data format="fastq" name="output" label="${tool.name} on ${on_string}: assembly results" /> - </outputs> - - <tests> - <test> - <param name="inputfastq3p" value="wolf_small.F.fastq" /> - <param name="inputfastq5p" value="wolf_small.R.fastq" /> - <param name="score" value="40.0" /> - <output name="output" file="illuminapairedend.output.fastq" ftype="fastq"/> - </test> - </tests> - - <help><![CDATA[ - -.. class:: warning - -**Warning:** -Sequence records corresponding to the same read pair must be in the same order in the two files - --------- - -.. class:: infomark - -**What it does** - -illuminapairedend aims at aligning the two reads of a pair-end library sequenced using an Illumina platform : - -\* If the two reads overlap, it returns the consensus sequence together with its quality -\* Otherwise, it concatenates sequence merging the forward read and the reversed-complemented reverse read. - -The program uses as input one or two fastq sequences reads files. - -\* If two files are used one of them must be specified using the -r option. Sequence records corresponding to the same read pair must be in the same order in the two files. -\* If just one file is provided, sequence records are supposed to be all of the same length. The first half of th e sequence is used as forward read, the second half is used as the reverse read. - -illuminapairedend align the forward sequence record with the reverse complement of the reverse sequence record. The alignment algorithm takes into account the base qualities. - -@OBITOOLS_LINK@ - -]]> - </help> - <expand macro="citation" /> - -</tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/macros.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,248 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="requirements"> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">obitools</requirement> - </requirements> - </xml> -<<<<<<< HEAD - - <token name="@TOOL_VERSION@">1.2.13</token> - <token name="@PROFILE@">21.01</token> -======= - <xml name="bio_tools"> - <xrefs> - <xref type='bio.tools'>obitools</xref> - </xrefs> - </xml> - <token name="@TOOL_VERSION@">1.2.11</token> ->>>>>>> 7abad681f (add tools up until P) - - <xml name="stdio"> - <stdio> - <exit_code range="1:" level="fatal" description="Error in Obitools execution" /> - </stdio> - </xml> - - <token name="@INPUT_FORMATS@">fastqsanger,fastqsanger.gz,fastqsolexa,fastqsolexa.gz,fasta,fasta.gz</token> - <token name="@GUNZIP_INPUT@"><![CDATA[ - #if $input.ext.endswith(".gz") - gunzip -c '$input' > input && - #else - ln -s '$input' input && - #end if - ]]></token> - <token name="@GZIP_OUTPUT@"><![CDATA[ - #if $input.ext.endswith(".gz") - | gzip -c - #end if - ]]></token> - - <!-- generate galaxy.json to determine output format - - by default same as input - - if out_format can be specified - - fasta/fastq depending on $out_format (if != "") - - or fasta if the tool does not support choosing the output format, e.g. obiuniq which always outputs fasta - - gz if $input is zipped - - A list of outputs to include in the json can be given by - setting a variable $outputs = [...] listing pairs of output names and the actual output - (defaults to [("output", $output)]) - --> - <token name="@GENERATE_GALAXY_JSON@"><![CDATA[ - #import json - - #try: - #silent $outputs[0] - #except - #set outputs = [("output", $output)] - #end try - - #if $input.ext.startswith("fastq") - #set ext = "fastqsanger" - #else if $input.ext.startswith("fasta") - #set ext = "fasta" - #end if - - #try - #if $out_format == "fasta" - #set ext = "fasta" - #else if $out_format == "fastq" - #set ext = "fastqsanger" - #end if - #except - #set ext = "fasta" - #end try - - #if $input.ext.endswith(".gz") - #set ext = ext + ".gz" - #end if - - #set gxy_json = {} - #for oname, o in $outputs - #silent gxy_json[oname] = {"ext": ext} - #end for - && echo '${json.dumps(gxy_json)}' >> galaxy.json - ]]></token> - - <token name="@OUT_FORMAT@"><![CDATA[ - #if $out_format - --${out_format}-output - #end if - ]]></token> - - <xml name="out_format_macro"> - <param name="out_format" type="select" optional="true" label="Output data type" help="For FASTA/Q the the default output type is the same as the input type"> - <option value="fasta">fasta</option> - <option value="fastq">fastq</option> - </param> - </xml> - - <token name="@OBITOOLS_LINK@"> -<![CDATA[ --------- - -**Project links:** - -`OBITools`_ - -.. _OBITools: https://pythonhosted.org/OBITools/ -]]> - </token> - - <xml name="attributes"> - <option value="ali_dir" selected="true">ali_dir</option> - <option value="ali_length">ali_length</option> - <option value="avg_quality">avg_quality</option> - <option value="best_match">best_match</option> - <option value="best_identity">best_identity</option> - <option value="class">class</option> - <option value="cluster">cluster</option> - <option value="complemented">complemented</option> - <option value="count">count</option> - <option value="cut">cut</option> - <option value="direction">direction</option> - <option value="distance">distance</option> - <option value="error">error</option> - <option value="experiment">experiment</option> - <option value="family">family</option> - <option value="family_name">family_name</option> - <option value="forward_error">forward_error</option> - <option value="forward_match">forward_match</option> - <option value="forward_primer">forward_primer</option> - <option value="forward_score">forward_score</option> - <option value="forward_tag">forward_tag</option> - <option value="forward_tm">forward_tm</option> - <option value="genus">genus</option> - <option value="genus_name">genus_name</option> - <option value="head_quality">head_quality</option> - <option value="id_status">id_status</option> - <option value="merged_star">merged_star</option> - <option value="merged">merged</option> - <option value="mid_quality">mid_quality</option> - <option value="mode">mode</option> - <option value="obiclean_cluster">obiclean_cluster</option> - <option value="obiclean_count">obiclean_count</option> - <option value="obiclean_head">obiclean_head</option> - <option value="obiclean_headcount">obiclean_headcount</option> - <option value="obiclean_internalcount">obiclean_internalcount</option> - <option value="obiclean_samplecount">obiclean_samplecount</option> - <option value="obiclean_singletoncount">obiclean_singletoncount</option> - <option value="obiclean_status">obiclean_status</option> - <option value="occurrence">occurrence</option> - <option value="order">order</option> - <option value="order_name">order_name</option> - <option value="pairend_limit ">pairend_limit </option> - <option value="partial ">partial </option> - <option value="rank">rank</option> - <option value="reverse_error">reverse_error</option> - <option value="reverse_match">reverse_match</option> - <option value="reverse_primer">reverse_primer</option> - <option value="reverse_score">reverse_score</option> - <option value="reverse_tag">reverse_tag</option> - <option value="reverse_tm">reverse_tm</option> - <option value="sample">sample</option> - <option value="scientific_name">scientific_name</option> - <option value="score">score</option> - <option value="score_norm">score_norm</option> - <option value="select">select</option> - <option value="seq_ab_match">seq_ab_match</option> - <option value="seq_a_single">seq_a_single</option> - <option value="seq_a_mismatch">seq_a_mismatch</option> - <option value="seq_a_deletion">seq_a_deletion</option> - <option value="seq_a_insertion">seq_a_insertion</option> - <option value="seq_b_single">seq_b_single</option> - <option value="seq_b_mismatch">seq_b_mismatch</option> - <option value="seq_b_deletion">seq_b_deletion</option> - <option value="seq_b_insertion">seq_b_insertion</option> - <option value="seq_length">seq_length</option> - <option value="seq_length_ori">seq_length_ori</option> - <option value="seq_rank">seq_rank</option> - <option value="sminL">sminL</option> - <option value="sminR">sminR</option> - <option value="species">species</option> - <option value="species_list">species_list</option> - <option value="species_name">species_name</option> - <option value="status">status</option> - <option value="strand">strand</option> - <option value="tail_quality">tail_quality</option> - <option value="taxid">taxid</option> - </xml> - - <xml name="input_format_options_macro"> - <section name="input_format_options" title="Input format options" expanded="false"> - <param name="options_inputtype" type="select" optional="true" label="Specify the input datatype" help="default: determine automatically (should only be necessay for non FASTA/FASTQ datasets)"> - <option value="--genbank">genbank</option> - <option value="--embl">embl</option> - <option value="--sanger">sanger</option> - <option value="--solexa">solexa</option> - <option value="--ecopcr">ecopcr</option> - <option value="--ecopcrdb">ecopcrdb</option> - <option value="--fasta">fasta (including obitools fasta extentions)</option> - <option value="--raw-fasta">raw fasta (more tolerant format variant)</option> - </param> - <param name="options_seqtype" type="select" optional="true" label="Specify the sequence datatype" > - <option value="--nuc">nucleic</option> - <option value="--prot">protein</option> - </param> - </section> - </xml> - - <token name="@INPUT_FORMAT@"><![CDATA[ - #if $input_format_options.options_inputtype - $input_format_options.options_inputtype - #else - #if $input.ext.startswith("fasta") - --fasta - #else if $input.ext.startswith("fastqsolexa") - ## input file is in fastq nucleic format produced by solexa sequencer - --solexa - #else - ## input file is in sanger fastq nucleic format (standard fastq) - --sanger - #end if - #end if - #if $input_format_options.options_seqtype - $input_format_options.options_seqtype - #end if - ]]></token> - - <xml name="sanitizer"> - <sanitizer invalid_char="test"> - <valid initial="default"> - <!--add value="""/--> - </valid> - <mapping initial="default"> - <add source=">" target="\>"/> - <add source=""" target="\""/> - </mapping> - </sanitizer> - </xml> - - <xml name="citation"> - <citations> - <citation type="doi">10.1111/1755-0998.12428</citation> - </citations> - </xml> - -</macros>
--- a/ngsfilter.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,239 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_ngsfilter" name="NGSfilter" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command><![CDATA[ - @GUNZIP_INPUT@ - - ngsfilter - - --without-progress-bar - -t '$inputextrafile' - #if $bool - -u unident - #end if - -e '$mismatch' - @INPUT_FORMAT@ - @OUT_FORMAT@ - input - @GZIP_OUTPUT@ - > '$output' - - #if $bool - #if $input.ext.endswith(".gz") - && gzip -c unident > '$unident' - #else - && mv unident '$unident' - #end if - #set outputs = [("output", $output), ("unident", $unident)] - #end if - @GENERATE_GALAXY_JSON@ - ]]></command> - <inputs> - <param name="inputextrafile" type="data" format="tabular" label="Parameter file" help="extrafile describing the DNA tags and primers sequences used for each sample" /> - <param name="input" type="data" format="@INPUT_FORMATS@" label="Read from file" help="file of Illumina pair-end reads assembled by illuminapairedend" /> - <param name="mismatch" type="integer" value="2" label="Number of errors allowed for matching primers (default = 2)" /> - <param name="bool" type="boolean" checked="true" label="Do you want to generate a file with unidentified sequences?" /> - <expand macro="input_format_options_macro"/> - <expand macro="out_format_macro"/> - </inputs> - <outputs> - <data name="unident" format="auto" label="${tool.name} on ${on_string}: Unassigned sequences"> - <filter>bool is True</filter> - </data> - <data name="output" format="auto" label="${tool.name} on ${on_string}: Trimmed and annotated" /> - </outputs> - <tests> - <test> - <param name="inputextrafile" value="input_ngsfilter_extrafile.txt" /> - <param name="input" value="illuminapairedend.output.fastq" ftype="fastqsanger" /> - <param name="mismatch" value="2" /> - <param name="bool" value="False" /> - <output name="output" file="output_ngsfilter.fastq" ftype="fastqsanger"/> - </test> - <test> - <param name="inputextrafile" value="input_ngsfilter_extrafile.txt" /> - <param name="input" value="illuminapairedend.output.fastq.gz" ftype="fastqsanger.gz" /> - <param name="mismatch" value="3" /> - <param name="bool" value="True" /> - <output name="output" file="output_ngsfilter_error_3.fastq" ftype="fastqsanger.gz" decompress="true"/> - <output name="unident" file="output_ngsfilter_unidentified.fastq" ftype="fastqsanger.gz" decompress="true"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -A DNA metabarcoding experiment can be considered as a set a PCR products mixed together and sequenced using a next generation sequencer ({i.e.} a solexa or a 454). -To distinguish between this different PCR products, pairs of small DNA sequences (call tags, see the oligoTag command and its associated paper for more -informations on the design of such tags) unique for each PCR products are concatenated to the PCR primers. -As they are amplified during the PCR, these tags should be recognizable, together with their respective primers, at the beginning and the end of the reads. -The first step in data analysis is thus to demultiplex the large resulting sequence file by identifying these DNA tags and the primers. - -Usually the results of sequencing are stored in one or more files formatted according to the fasta or fastq format. ngsfilter take as input such sequence file and an extra file describing the DNA tags and primers sequences used for each sample. - -The results consist of sequences trimmed of the primers and tags and annotated with the corresponding sample (and possibly some extra informations). -Sequences for which the tags and primers have not been well identified, and which are thus unassigned to any sample, are tagged as erroneous sequences -by ngsfilter. Such erroneous sequences are not reported by the program unless specified by the appropriate option. - --------- - -**Help** -Extrafile format: - -a tab delimited tabular file with 8 columns. - -experiment_name sample_name oligo_tag(s) forward_primer reverse_primer F @ extra_information - -ex: - -laos_gh LA01E1a agcgacta:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01A; - -laos_gh LA01E1b tcagtgtc:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01B; - -laos_gh LA01E2a actctgct:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01C; - -laos_gh LA01E2b atatagcg:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01D; - -laos_gh LA01P1a ctatgcta:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01E; - -laos_gh LA01P1b tcgcgctg:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01F; - -laos_gh LA01P2a agcacagt:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01G; - -laos_gh LA01P2b tagctagt:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01H; - -laos_gh LA02E1a agcgacta:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02A; - -laos_gh LA02E1b tcagtgtc:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02B; - -laos_gh LA02E2a actctgct:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02C; - -@OBITOOLS_LINK@ - - -]]> - - </help> - <expand macro="citation" /> - -</tool> -======= -<tool id="obi_ngsfilter" name="NGSfilter" version="@TOOL_VERSION@"> - <description>Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command> - <![CDATA[ - - ngsfilter - -t '$inputextrafile' '$inputfastq' - #if $bool - -u '$unident' - #end if - -e '$mismatch' > '$output' - - ]]> - - </command> - - <inputs> - <param name="inputextrafile" type="data" format="tabular" label="Parameter file" help="extrafile describing the DNA tags and primers sequences used for each sample" /> - <param name="inputfastq" type="data" format="fastq" label="Read from file" help="file of Illumina pair-end reads assembled by illuminapairedend" /> - <param name="mismatch" type="integer" value="2" label="Number of errors allowed for matching primers (default = 2)" /> - <param name="bool" type="boolean" checked="true" label="Do you want to generate a file with unidentified sequences?" /> - </inputs> - <outputs> - <data format="fastq" name="unident" label="unidentified.fastq with ${tool.name} on ${on_string}"> - <filter>bool is True</filter> - </data> - <data format="fastq" name="output" label="${tool.name} on ${on_string}: Trimmed and annotated" /> - </outputs> - <tests> - <test> - <param name="inputextrafile" value="input_ngsfilter_extrafile.txt" /> - <param name="inputfastq" value="illuminapairedend.output.fastq" /> - <param name="mismatch" value="2" /> - <param name="bool" value="False" /> - <output name="output" file="output_ngsfilter.fastq" ftype="fastq"/> - </test> - <test> - <param name="inputextrafile" value="input_ngsfilter_extrafile.txt" /> - <param name="inputfastq" value="illuminapairedend.output.fastq" /> - <param name="mismatch" value="3" /> - <param name="bool" value="True" /> - <output name="output" file="output_ngsfilter_error_3.fastq" ftype="fastq"/> - <output name="unident" file="output_ngsfilter_unidentified.fastq" ftype="fastq"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -A DNA metabarcoding experiment can be considered as a set a PCR products mixed together and sequenced using a next generation sequencer ({i.e.} a solexa or a 454). -To distinguish between this different PCR products, pairs of small DNA sequences (call tags, see the oligoTag command and its associated paper for more -informations on the design of such tags) unique for each PCR products are concatenated to the PCR primers. -As they are amplified during the PCR, these tags should be recognizable, together with their respective primers, at the beginning and the end of the reads. -The first step in data analysis is thus to demultiplex the large resulting sequence file by identifying these DNA tags and the primers. - -Usually the results of sequencing are stored in one or more files formatted according to the fasta or fastq format. ngsfilter take as input such sequence file and an extra file describing the DNA tags and primers sequences used for each sample. - -The results consist of sequences trimmed of the primers and tags and annotated with the corresponding sample (and possibly some extra informations). -Sequences for which the tags and primers have not been well identified, and which are thus unassigned to any sample, are tagged as erroneous sequences -by ngsfilter. Such erroneous sequences are not reported by the program unless specified by the appropriate option. - --------- - -**Help** -Extrafile format: - -a tab delimited tabular file with 8 columns. - -experiment_name sample_name oligo_tag(s) forward_primer reverse_primer F @ extra_information - -ex: - -laos_gh LA01E1a agcgacta:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01A; - -laos_gh LA01E1b tcagtgtc:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01B; - -laos_gh LA01E2a actctgct:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01C; - -laos_gh LA01E2b atatagcg:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01D; - -laos_gh LA01P1a ctatgcta:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01E; - -laos_gh LA01P1b tcgcgctg:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01F; - -laos_gh LA01P2a agcacagt:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01G; - -laos_gh LA01P2b tagctagt:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01H; - -laos_gh LA02E1a agcgacta:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02A; - -laos_gh LA02E1b tcagtgtc:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02B; - -laos_gh LA02E2a actctgct:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02C; - -@OBITOOLS_LINK@ - - -]]> - - </help> - <expand macro="citation" /> - -</tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/obiannotate.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,337 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_annotate" name="obiannotate" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>Adds/Edits sequence record annotations</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command><![CDATA[ - @GUNZIP_INPUT@ - - obiannotate - --without-progress-bar - ${seqrank} - - #if $rename_tag.old_name and $rename_tag.new_name - -R '$rename_tag.old_name':'$rename_tag.new_name' - #end if - - #if $deletetag_key - --delete-tag='$deletetag_key' - #end if - - #if $set_tag.key and $set_tag.pythonexpression - -S '$set_tag.key':'$set_tag.pythonexpression' - #end if - - #if $taglist - --tag-list='$taglist' - #end if - - #if $setid_pythonexpression - --set-identifier='$setid_pythonexpression' - #end if - - #if $pythonexpression - --run='$pythonexpression' - #end if - - #if $setsequence_pythonexpression - --set-sequence='$setsequence_pythonexpression' - #end if - - #if $setdefinition_pythonexpression - --set-definition='$setdefinition_pythonexpression' - #end if - - #if $key_selector.key - --keep='$key_selector.key' - #end if - #if $key_selector.key2 - --keep='$key_selector.key2' - #end if - #if $key_selector.key3 - --keep='$key_selector.key3' - #end if - #if $key_selector.key4 - --keep='$key_selector.key4' - #end if - #if $key_selector.key5 - --keep='$key_selector.key5' - #end if - - ${length} - ${clearbool} - ${uniqid} - - #if $rankname - --with-taxon-at-rank='$rankname' - #end if - - #if $mclfile - --mcl='$mclfile' - #end if - @INPUT_FORMAT@ - @OUT_FORMAT@ - input - @GZIP_OUTPUT@ - > '$output' - - @GENERATE_GALAXY_JSON@ - ]]></command> - - <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@" label="Input sequences file" /> - <param name="seqrank" type="boolean" label="Add a new attribute seq_rank" truevalue="--seq-rank" falsevalue="" help="Use this option if you want to add a new attribute named seq_rank to the sequence record indicating its entry number in the sequence file" /> - <section name="rename_tag" title="Rename attribute" expanded="False"> - <param name="old_name" type="text" label="old name" optional="true"/> - <param name="new_name" type="text" label="new name" optional="true"/> - </section> - <param name="deletetag_key" type="text" label="Delete attribute" optional="true" help="Use this option if you want to delete attribute named ATTRIBUTE_NAME.When this attribute is missing, the sequence record is skipped and the next one is examined"/> - - <section name="set_tag" title="Create a new attribute" expanded="False"> - <param name="key" type="text" label="key" optional="true"/> - <param name="pythonexpression" type="text" label="python expression" optional="true"/> - </section> - - <param name="taglist" type="data" optional="true" format="txt,tabular" label="Use a tag list" help="file containing identifiers of sequences to select" /> - - <param name="setid_pythonexpression" type="text" label="Set sequence record identifier with a value" help="Use this option if you want to set sequence record identifier with a value computed from PYTHON_EXPRESSION" /> - - <param name="pythonexpression" type="text" label="Run a PYTHON_EXPRESSION on each selected sequence" optional="true" help="Use this option if you want to run a PYTHON_EXPRESSION on each selected sequence"/> - - <param name="setsequence_pythonexpression" type="text" label="Change the sequence itself with a value" help="Use this option if you want to change the sequence itself with a value computed from PYTHON_EXPRESSION"/> - - <param name="setdefinition_pythonexpression" type="text" label="Set sequence definition with a value computed" help="Use this option if you want to set sequence definition with a value computed from PYTHON_EXPRESSION"/> - - <param name="clearbool" type="boolean" label="Clear all attributes associated to the sequence records" truevalue="--clear" falsevalue="" help="Use this option if you want to clear all attributes associated to the sequence records" /> - - <section name="key_selector" title="Keep only attribute with key" expanded="False"> - <param name="key" type="text" label="key" optional="true" /> - <param name="key2" type="text" label="if you want to specify a second key" optional="true" /> - <param name="key3" type="text" label="if you want to specify a third key" optional="true" /> - <param name="key4" type="text" label="if you want to specify a fourth key" optional="true" /> - <param name="key5" type="text" label="if you want to specify a fifth key" optional="true" /> - </section> - - <param name="length" type="boolean" label="Use the length option?" truevalue="--length" falsevalue="" help="Use this option if you want to add attribute with seq_length as a key and sequence length as a value" /> - - <param name="rankname" type="text" label="Add taxonomic annotation" help="Use this option if you want to add taxonomic annotation at taxonomic rank RANK_NAME"/> - - <param name="mclfile" optional="true" type="data" format="txt,tabular" label="mcl file" help="use this option if you want to add a new attribute containing the number of the cluster the sequence record was assigned to, as indicated in file MCLFILE" /> - - <param name="uniqid" type="boolean" label="Force sequence record ids to be unique" truevalue="--uniq-id" falsevalue="" help="Use this option if you want to force sequence record ids to be unique" /> - <expand macro="input_format_options_macro"/> - <expand macro="out_format_macro"/> - </inputs> - <outputs> - <data name="output" format="auto"/> - </outputs> - <tests> - <test> - <param name="input" value="output_obiuniq.fasta" ftype="fasta"/> - <section name="key_selector"> - <param name="key" value="count" /> - <param name="key2" value="merged_sample" /> - </section> - <param name="uniqid" value="true" /> - <param name="clearbool" value="true" /> - <param name="length" value="true" /> - <param name="seqrank" value="true" /> - - <output name="output" file="output_obiannotate.fasta" ftype="fasta"/> - </test> - <test> - <param name="input" value="output_obiuniq.fasta.gz" ftype="fasta.gz"/> - <section name="key_selector"> - <param name="key" value="count" /> - <param name="key2" value="merged_sample" /> - </section> - <param name="uniqid" value="true" /> - <param name="clearbool" value="true" /> - <param name="length" value="true" /> - <param name="seqrank" value="true" /> - <output name="output" file="output_obiannotate.fasta.gz" ftype="fasta.gz" decompress="true"/> - </test> - <!-- TODO fastq input test --> - </tests> - - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -`obiannotate` is the command that allows adding/modifying/removing annotation attributes attached to sequence records. - -Once such attributes are added, they can be used by the other OBITools commands for filtering purposes or for statistics computing. - -@OBITOOLS_LINK@ - -]]> - - </help> - <expand macro="citation" /> -</tool> -======= -<tool id="obi_annotate" name="obiannotate" version="@TOOL_VERSION@"> - <description>Adds/Edits sequence record annotations</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command> - - <![CDATA[ - obiannotate - ${seqrank} - - #if $rename_tag.old_name and $rename_tag.new_name - -R '$rename_tag.old_name':'$rename_tag.new_name' - #end if - - #if $deletetag_key - --delete-tag='$deletetag_key' - #end if - - #if $set_tag.key and $set_tag.pythonexpression - -S '$set_tag.key':'$set_tag.pythonexpression' - #end if - - #if $taglist - --tag-list='$taglist' - #end if - - #if $setid_pythonexpression - --set-identifier='$setid_pythonexpression' - #end if - - #if $pythonexpression - --run='$pythonexpression' - #end if - - #if $setsequence_pythonexpression - --set-sequence='$setsequence_pythonexpression' - #end if - - #if $setdefinition_pythonexpression - --set-definition='$setdefinition_pythonexpression' - #end if - - #if $key_selector.key - --keep='$key_selector.key' - #end if - #if $key_selector.key2 - --keep='$key_selector.key2' - #end if - #if $key_selector.key3 - --keep='$key_selector.key3' - #end if - #if $key_selector.key4 - --keep='$key_selector.key4' - #end if - #if $key_selector.key5 - --keep='$key_selector.key5' - #end if - - ${length} - ${clearbool} - ${uniqid} - - #if $rankname - --with-taxon-at-rank='$rankname' - #end if - - #if $mclfile - --mcl='$mclfile' - #end if - - '$inputseq' > '$output' - ]]> - - </command> - - <inputs> - <param name="inputseq" type="data" format="fastq,fasta" label="Input sequences file" /> - <param name="seqrank" type="boolean" label="Add a new attribute seq_rank" truevalue="--seq-rank" falsevalue="" help="Use this option if you want to add a new attribute named seq_rank to the sequence record indicating its entry number in the sequence file" /> - <section name="rename_tag" title="Rename attribute" expanded="False"> - <param name="old_name" type="text" label="old name" optional="true"/> - <param name="new_name" type="text" label="new name" optional="true"/> - </section> - <param name="deletetag_key" type="text" label="Delete attribute" optional="true" help="Use this option if you want to delete attribute named ATTRIBUTE_NAME.When this attribute is missing, the sequence record is skipped and the next one is examined"/> - - <section name="set_tag" title="Create a new attribute" expanded="False"> - <param name="key" type="text" label="key" optional="true"/> - <param name="pythonexpression" type="text" label="python expression" optional="true"/> - </section> - - <param name="taglist" type="data" optional="true" format="txt,tabular" label="Use a tag list" help="file containing identifiers of sequences to select" /> - - <param name="setid_pythonexpression" type="text" label="Set sequence record identifier with a value" help="Use this option if you want to set sequence record identifier with a value computed from PYTHON_EXPRESSION" /> - - <param name="pythonexpression" type="text" label="Run a PYTHON_EXPRESSION on each selected sequence" optional="true" help="Use this option if you want to run a PYTHON_EXPRESSION on each selected sequence"/> - - <param name="setsequence_pythonexpression" type="text" label="Change the sequence itself with a value" help="Use this option if you want to change the sequence itself with a value computed from PYTHON_EXPRESSION"/> - - <param name="setdefinition_pythonexpression" type="text" label="Set sequence definition with a value computed" help="Use this option if you want to set sequence definition with a value computed from PYTHON_EXPRESSION"/> - - <param name="clearbool" type="boolean" label="Clear all attributes associated to the sequence records" truevalue="--clear" falsevalue="" help="Use this option if you want to clear all attributes associated to the sequence records" /> - - <section name="key_selector" title="Keep only attribute with key" expanded="False"> - <param name="key" type="text" label="key" optional="true" /> - <param name="key2" type="text" label="if you want to specify a second key" optional="true" /> - <param name="key3" type="text" label="if you want to specify a third key" optional="true" /> - <param name="key4" type="text" label="if you want to specify a fourth key" optional="true" /> - <param name="key5" type="text" label="if you want to specify a fifth key" optional="true" /> - </section> - - <param name="length" type="boolean" label="Use the length option?" truevalue="--length" falsevalue="" help="Use this option if you want to add attribute with seq_length as a key and sequence length as a value" /> - - <param name="rankname" type="text" label="Add taxonomic annotation" help="Use this option if you want to add taxonomic annotation at taxonomic rank RANK_NAME"/> - - <param name="mclfile" optional="true" type="data" format="txt,tabular" label="mcl file" help="use this option if you want to add a new attribute containing the number of the cluster the sequence record was assigned to, as indicated in file MCLFILE" /> - - <param name="uniqid" type="boolean" label="Force sequence record ids to be unique" truevalue="--uniq-id" falsevalue="" help="Use this option if you want to force sequence record ids to be unique" /> - - </inputs> - <outputs> - <data format="fasta" name="output" label="output.fasta with ${tool.name} on ${on_string}" /> - </outputs> - - <tests> - <test> - <param name="inputseq" value="output_obiuniq.fasta" /> - <section name="key_selector"> - <param name="key" value="count" /> - <param name="key2" value="merged_sample" /> - </section> - <param name="uniqid" value="true" /> - <param name="clearbool" value="true" /> - <param name="length" value="true" /> - <param name="seqrank" value="true" /> - - <output name="output" file="output_obiannotate.fasta" ftype="fasta"/> - </test> - </tests> - - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -`obiannotate` is the command that allows adding/modifying/removing annotation attributes attached to sequence records. - -Once such attributes are added, they can be used by the other OBITools commands for filtering purposes or for statistics computing. - -@OBITOOLS_LINK@ - -]]> - - </help> - <expand macro="citation" /> -</tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/obiclean.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,156 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_clean" name="obiclean" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>tags a set of sequences for PCR/sequencing errors identification</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - - <command><![CDATA[ - @GUNZIP_INPUT@ - obiclean - --without-progress-bar - #if $distance - -d '$distance' - #end if - #if $key - -s '$key' - #end if - #if $ratio - -r '$ratio' - #end if - ${head} - @INPUT_FORMAT@ - @OUT_FORMAT@ - input - @GZIP_OUTPUT@ - >'$output' - @GENERATE_GALAXY_JSON@ - ]]></command> - <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@" label="Input sequences file" /> - <param name="distance" type="integer" value="1" optional="true" label="Maximum numbers of differences between two variant sequences (default: 1)" /> - <param name="key" type="text" optional="true" label="Specify an attribute containing sample definition" /> - <param name="ratio" optional="true" type="float" value="1" label="Threshold ratio between counts (rare/abundant counts) of two sequence records so that the less abundant one is a variant of the more abundant (default: 1, i.e. all less abundant sequences are variants)" /> - <param name="head" type="boolean" checked="false" truevalue="-H" falsevalue="" label="Do you want to select only sequences with the head status in a least one sample?" /> - <expand macro="input_format_options_macro"/> - <expand macro="out_format_macro"/> - </inputs> - <outputs> - <data format="auto" name="output"/> - </outputs> - <tests> - <test> - <param name="input" value="output_obiannotate.fasta" /> - <param name="head" value="True"/> - <output name="output" file="output_obiclean_simple.fasta" ftype="fasta"/> - </test> - <test> - <param name="input" value="output_obiannotate.fasta.gz" /> - <param name="key" value="merged_sample"/> - <param name="ratio" value="0.05"/> - <param name="head" value="False"/> - <output name="output" file="output_obiclean_advanced.fasta.gz" ftype="fasta.gz" decompress="true"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -obijoinpairedend aims at joining the two reads of a paired-end library. - -For this purpose, it concatenates sequence merging the forward read and the reversed-complemented reverse read. - -The program uses as input one or two sequences reads files. - -If two files are used one of them must be specified using the -r option. Sequence records corresponding to the same read pair must be in the same order in the two files. -If just one file is provided, sequence records are supposed to be all of the same length. The first half of the sequence is used as forward read, the second half is used as the reverse read. - -@OBITOOLS_LINK@ - - ]]> - </help> - <expand macro="citation" /> - </tool> -======= -<tool id="obi_clean" name="obiclean" version="@TOOL_VERSION@"> - <description>tags a set of sequences for PCR/sequencing errors identification</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - - <command> - - <![CDATA[ - obiclean - - #if $distance - -d '$distance' - #end if - #if $key - -s '$key' - #end if - #if $ratio - -r '$ratio' - #end if - ${head} - - '$input'>'$output' - - - ]]> - - </command> - - <inputs> - <param name="input" type="data" format="fasta" label="Input sequences file" /> - <param name="distance" type="integer" value="1" optional="true" label="Maximum numbers of differences between two variant sequences (default: 1)" /> - <param name="key" type="text" optional="true" label="Specify an attribute containing sample definition" /> - <param name="ratio" optional="true" type="float" value="1" label="Threshold ratio between counts (rare/abundant counts) of two sequence records so that the less abundant one is a variant of the more abundant (default: 1, i.e. all less abundant sequences are variants)" /> - <param name="head" type="boolean" checked="false" truevalue="-H" falsevalue="" label="Do you want to select only sequences with the head status in a least one sample?" /> - </inputs> - <outputs> - <data format="fasta" name="output" label="output with ${tool.name} on ${on_string}" /> - </outputs> - <tests> - <test> - <param name="input" value="output_obiannotate.fasta" /> - <param name="head" value="True"/> - <output name="output" file="output_obiclean_simple.fasta" ftype="fasta"/> - </test> - <test> - <param name="input" value="output_obiannotate.fasta" /> - <param name="key" value="merged_sample"/> - <param name="ratio" value="0.05"/> - <param name="head" value="False"/> - <output name="output" file="output_obiclean_advanced.fasta" ftype="fasta"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -obijoinpairedend aims at joining the two reads of a paired-end library. - -For this purpose, it concatenates sequence merging the forward read and the reversed-complemented reverse read. - -The program uses as input one or two sequences reads files. - -If two files are used one of them must be specified using the -r option. Sequence records corresponding to the same read pair must be in the same order in the two files. -If just one file is provided, sequence records are supposed to be all of the same length. The first half of the sequence is used as forward read, the second half is used as the reverse read. - -@OBITOOLS_LINK@ - - ]]> - </help> - <expand macro="citation" /> - </tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/obiconvert.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,215 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_convert" name="obiconvert" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>converts sequence files to different output formats</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command><![CDATA[ - @GUNZIP_INPUT@ - - obiconvert - #if $db - -d '$db' - #end if - #if $tax - -t '$tax' - #end if - @INPUT_FORMAT@ - @OUT_FORMAT@ - ${uppercase} - ${ecopcrdb} - #if str( $ecopcrdb) == "--ecopcrdb" - --ecopcrdb-output=${ecopcrdb_output} - #end if - input - @GZIP_OUTPUT@ - > '${output}' - @GENERATE_GALAXY_JSON@ - ]]></command> - <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@,txt,tabular" label="Input sequences file" help="database used for the in silico PCR. The database must be in the ecoPCR format (for example output of obiconvert)" /> - <param name="db" type="data" optional="true" format="txt,tabular" label="ecoPCR taxonomy database" /> - <param name="tax" type="data" optional="true" format="txt,tabular" label="NCBI taxonomy dump repository"/> - <expand macro="input_format_options_macro"/> - <expand macro="out_format_macro"/> - <param name="ecopcrdb" type="boolean" truevalue="--ecopcrdb" falsevalue="" label="Do you want to create an ecoPCR database from sequence records results?" help="Use this option if you want to generate an ecoPCR database output file" /> - <param name="uppercase" type="boolean" truevalue="--uppercase" falsevalue="" label="Do you want to print sequences in upper case?" /> - </inputs> - <outputs> - <data format="txt" name="ecopcrdb_output" label="${tool.name} on ${on_string}: ecopcrdb"> - <filter>ecopcrdb == True</filter> - </data> - <data format="auto" name="output"/> - </outputs> - <tests> - <test> - <param name="input" value="output_obisort.fasta" /> - <param name="options_inputtype" value="--fasta"/> - <param name="options_seqtype" value="--nuc"/> - <param name="out_format" value="fasta"/> - <param name="ecopcrdb" value="false"/> - <param name="uppercase" value="true"/> - <output name="output" file="output_obiconvert.fasta" ftype="fasta"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -obiconvert converts sequence files to different output formats. See the documentation for more details on the different formats. - -Input files can be in : - -fasta format - -extended OBITools fasta format - -Sanger fastq format - -Solexa fastq format - -ecoPCR format - -ecoPCR database format - -GenBank format - -EMBL format - -obiconvert converts those files to the : - -extended OBITools fasta format - -Sanger fastq format - -ecoPCR database format - -If no file name is specified, data is read from standard input. - -@OBITOOLS_LINK@ - - ]]> - - </help> - <expand macro="citation" /> -</tool> -======= -<tool id="obi_convert" name="obiconvert" version="@TOOL_VERSION@"> - <description>converts sequence files to different output formats</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - - <command> - <![CDATA[ - obiconvert - #if $db - -d '$db' - #end if - #if $tax - -t '$tax' - #end if - ${options_inputtype} - ${options_seqtype} - --${out_format}-output - ${uppercase} - ${ecopcrdb} - #if str( $ecopcrdb) == "--ecopcrdb" - --ecopcrdb-output=${ecopcrdb_output} - #end if - '${input}' > '${output}' - ]]> - - </command> - - <inputs> - <param name="input" type="data" format="fastq,fasta,txt,tabular" label="Input sequences file" help="database used for the in silico PCR. The database must be in the ecoPCR format (for example output of obiconvert)" /> - <param name="db" type="data" optional="true" format="txt,tabular" label="ecoPCR taxonomy database" /> - <param name="tax" type="data" optional="true" format="txt,tabular" label="NCBI taxonomy dump repository"/> - <param name="options_inputtype" type="select" label="Specify the input datatype"> - <expand macro="inputtype"/> - </param> - <param name="options_seqtype" type="select" label="Specify the sequence datatype" > - <option value="--nuc" selected="true">nucleic</option> - <option value="--prot">protein</option> - </param> - <param name="out_format" type="select" label="Output data type"> - <option value="fasta" selected="true">fasta</option> - <option value="fastq">fastq</option> - </param> - <param name="ecopcrdb" type="boolean" truevalue="--ecopcrdb" falsevalue="" label="Do you want to create an ecoPCR database from sequence records results?" help="Use this option if you want to generate an ecoPCR database output file" /> - <param name="uppercase" type="boolean" truevalue="--uppercase" falsevalue="" label="Do you want to print sequences in upper case?" /> - - </inputs> - <outputs> - <data format="txt" name="ecopcrdb_output" label="result.ecopcrdb with ${tool.name} on ${on_string}"> - <filter>ecopcrdb == True</filter> - </data> - <data format="fasta" name="output" label="output with ${tool.name} on ${on_string}" > - <change_format> - <when input="out_format" value="fastq" format="fastq" /> - </change_format> - </data> - </outputs> - <tests> - <test> - <param name="input" value="output_obisort.fasta" /> - <param name="options_inputtype" value="fasta"/> - <param name="options_seqtype" value="--nuc"/> - <param name="out_format" value="fasta"/> - <param name="ecopcrdb" value="False"/> - <param name="uppercase" value="True"/> - <output name="output" file="output_obiconvert.fasta" ftype="fasta"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -obiconvert converts sequence files to different output formats. See the documentation for more details on the different formats. - -Input files can be in : - -fasta format - -extended OBITools fasta format - -Sanger fastq format - -Solexa fastq format - -ecoPCR format - -ecoPCR database format - -GenBank format - -EMBL format - -obiconvert converts those files to the : - -extended OBITools fasta format - -Sanger fastq format - -ecoPCR database format - -If no file name is specified, data is read from standard input. - -@OBITOOLS_LINK@ - - ]]> - - </help> - <expand macro="citation" /> -</tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/obigrep.xml Wed Sep 01 07:54:50 2021 +0000 +++ b/obigrep.xml Thu Oct 30 16:01:34 2025 +0000 @@ -1,9 +1,9 @@ <tool id="obi_grep" name="obigrep" version="@TOOL_VERSION@" profile="@PROFILE@"> <description>Filters sequence file</description> - <expand macro="bio_tools"/> <macros> <import>macros.xml</import> </macros> + <expand macro="bio_tools"/> <expand macro="requirements"/> <expand macro="stdio"/> <command><![CDATA[ @@ -39,9 +39,9 @@ @GENERATE_GALAXY_JSON@ ]]></command> <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@" label="Input sequences file" /> + <param name="input" type="data" format="@INPUT_FORMATS@" label="Input sequences file"/> <conditional name="options_grep"> - <param name="options_grep_selector" type="select" label="Choose the sequence record selection option" > + <param name="options_grep_selector" type="select" label="Choose the sequence record selection option"> <option value="sequence" selected="true">sequence</option> <option value="definition">definition</option> <option value="identifier">identifier</option> @@ -53,47 +53,46 @@ <option value="lmin">lmin</option> </param> <when value="sequence"> - <param name="sequence" type="text" label="Regular expression pattern to be tested against the sequence itself. The pattern is case insensitive." > - <expand macro="sanitizer" /> + <param name="sequence" type="text" label="Regular expression pattern to be tested against the sequence itself. The pattern is case insensitive."> + <expand macro="sanitizer"/> </param> </when> <when value="definition"> - <param name="definition" type="text" label="Regular expression pattern to be tested against the definition of the sequence record. The pattern is case sensitive." > - <expand macro="sanitizer" /> + <param name="definition" type="text" label="Regular expression pattern to be tested against the definition of the sequence record. The pattern is case sensitive."> + <expand macro="sanitizer"/> </param> </when> <when value="identifier"> <param name="identifier" type="text" label="Regular expression pattern to be tested against the identifier of the sequence record. The pattern is case sensitive." > - <expand macro="sanitizer" /> + <expand macro="sanitizer"/> </param> </when> <when value="idlist"> - <param name="idlist" type="data" format="txt,tabular" label="points to a text file containing the list of sequence record identifiers to be selected. The file format consists in a single identifier per line." /> + <param name="idlist" type="data" format="txt,tabular" label="points to a text file containing the list of sequence record identifiers to be selected. The file format consists in a single identifier per line."/> </when> <when value="attribute"> - <param name="attribute" type="text" label="Regular expression pattern matched against the attributes of the sequence record. the value of this attribute is of the form : key:regular_pattern. The pattern is case sensitive." > - <expand macro="sanitizer" /> + <param name="attribute" type="text" label="Regular expression pattern matched against the attributes of the sequence record. the value of this attribute is of the form : key:regular_pattern. The pattern is case sensitive."> + <expand macro="sanitizer"/> </param> </when> <when value="hasattribute"> - <param name="hasattribute" type="text" label="Selects sequence records having an attribute who is key." > + <param name="hasattribute" type="text" label="Selects sequence records having an attribute who is key."> <expand macro="sanitizer" /> </param> </when> <when value="predicat"> - <param name="predicat" type="text" label="Python boolean expression to be evaluated for each sequence record." help="The attribute keys defined for each sequence record can be used in the expression as variable names. An extra variable named sequenceefers to the sequence record itself." > - <expand macro="sanitizer" /> + <param name="predicat" type="text" label="Python boolean expression to be evaluated for each sequence record." help="The attribute keys defined for each sequence record can be used in the expression as variable names. An extra variable named sequenceefers to the sequence record itself."> + <expand macro="sanitizer"/> </param> </when> <when value="lmax"> - <param name="lmax" type="text" label="lmax" help="Keeps sequence records whose sequence length is equal or shorter than lmax" /> + <param name="lmax" type="text" label="lmax" help="Keeps sequence records whose sequence length is equal or shorter than lmax"/> </when> <when value="lmin"> <param name="lmin" type="text" label="lmin" help="Keeps sequence records whose sequence length is equal or longer than lmin"/> </when> </conditional> - - <param name="v" type="boolean" truevalue="-v" falsevalue="" checked="false" label="Invert the sequence record selection (option -v)" /> + <param name="v" type="boolean" truevalue="-v" falsevalue="" checked="false" label="Invert the sequence record selection (option -v)"/> <expand macro="input_format_options_macro"/> <expand macro="out_format_macro"/> </inputs> @@ -101,25 +100,36 @@ <data format="auto" name="output"/> </outputs> <tests> - <test> - <param name="input" value="illuminapairedend.output.fastq" /> - <conditional name="options_grep"> - <param name="options_grep_selector" value="lmin"/> - <param name="lmin" value="80"/> - </conditional> - <param name="v" value="true" /> - <param name="out_format" value="fastq" /> - <output name="output" file="output_obigrep_lmin.fastq" ftype="fastqsanger"/> - </test> - <test> - <param name="input" value="illuminapairedend.output.fastq.gz" ftype="fastq.gz" /> - <conditional name="options_grep"> - <param name="options_grep_selector" value="predicat"/> - <param name="predicat" value='mode!="joined"'/> - </conditional> - <param name="v" value="false" /> - <param name="out_format" value="fasta" /> - <output name="output" file="output_obigrep_predicat.fasta.gz" ftype="fasta.gz" decompress="true"/> + <test expect_num_outputs="1"> + <!-- since the test data has large qualities it is sniffed as generic fastq which + is not accepted py the parameter so the ftype is set, in this test we + overwrite tis with the options_inputtype parameter and check if its set + correctly on the CLI--> + <param name="input" value="illuminapairedend.output.fastq" ftype="fastqsolexa"/> + <conditional name="options_grep"> + <param name="options_grep_selector" value="lmin"/> + <param name="lmin" value="80"/> + </conditional> + <param name="v" value="true" /> + <param name="options_inputtype" value="--sanger"/> + <param name="out_format" value="fastq" /> + <output name="output" file="output_obigrep_lmin.fastq" ftype="fastqsanger"/> + <assert_command> + <has_text text="--sanger"/> + </assert_command> + </test> + <test expect_num_outputs="1"> + <param name="input" value="illuminapairedend.output.fastq.gz" ftype="fastqsolexa.gz"/> + <conditional name="options_grep"> + <param name="options_grep_selector" value="predicat"/> + <param name="predicat" value='mode!="joined"'/> + </conditional> + <param name="v" value="false" /> + <param name="out_format" value="fasta" /> + <output name="output" file="output_obigrep_predicat.fasta.gz" ftype="fasta.gz" decompress="true"/> + <assert_command> + <has_text text="--solexa"/> + </assert_command> </test> </tests> <help><![CDATA[
--- a/obigrep.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,337 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_grep" name="obigrep" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>Filters sequence file</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command><![CDATA[ - @GUNZIP_INPUT@ - - obigrep - --without-progress-bar - ${v} - #if str($options_grep.options_grep_selector) == 'sequence' - -s ${options_grep.sequence} - #else if str($options_grep.options_grep_selector) == 'definition' - -D ${options_grep.definition} - #else if str($options_grep.options_grep_selector) == 'identifier' - -I ${options_grep.identifier} - #else if str($options_grep.options_grep_selector) == 'idlist' - --id-list '${options_grep.idlist}' - #else if str($options_grep.options_grep_selector) == 'attribute' - -a ${options_grep.attribute} - #else if str($options_grep.options_grep_selector) == 'hasattribute' - -A ${options_grep.attribute} - #else if str($options_grep.options_grep_selector) == 'predicat' - -p ${options_grep.predicat} - #else if str($options_grep.options_grep_selector) == 'lmax' - -L ${options_grep.lmax} - #else if str($options_grep.options_grep_selector) == 'lmin' - -l ${options_grep.lmin} - #end if - @INPUT_FORMAT@ - @OUT_FORMAT@ - input - @GZIP_OUTPUT@ - > '$output' - @GENERATE_GALAXY_JSON@ - ]]></command> - <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@" label="Input sequences file" /> - <conditional name="options_grep"> - <param name="options_grep_selector" type="select" label="Choose the sequence record selection option" > - <option value="sequence" selected="true">sequence</option> - <option value="definition">definition</option> - <option value="identifier">identifier</option> - <option value="idlist">idlist</option> - <option value="attribute">attribute</option> - <option value="hasattribute">hasattribute</option> - <option value="predicat">predicat</option> - <option value="lmax">lmax</option> - <option value="lmin">lmin</option> - </param> - <when value="sequence"> - <param name="sequence" type="text" label="Regular expression pattern to be tested against the sequence itself. The pattern is case insensitive." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="definition"> - <param name="definition" type="text" label="Regular expression pattern to be tested against the definition of the sequence record. The pattern is case sensitive." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="identifier"> - <param name="identifier" type="text" label="Regular expression pattern to be tested against the identifier of the sequence record. The pattern is case sensitive." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="idlist"> - <param name="idlist" type="data" format="txt,tabular" label="points to a text file containing the list of sequence record identifiers to be selected. The file format consists in a single identifier per line." /> - </when> - <when value="attribute"> - <param name="attribute" type="text" label="Regular expression pattern matched against the attributes of the sequence record. the value of this attribute is of the form : key:regular_pattern. The pattern is case sensitive." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="hasattribute"> - <param name="hasattribute" type="text" label="Selects sequence records having an attribute who is key." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="predicat"> - <param name="predicat" type="text" label="Python boolean expression to be evaluated for each sequence record." help="The attribute keys defined for each sequence record can be used in the expression as variable names. An extra variable named sequenceefers to the sequence record itself." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="lmax"> - <param name="lmax" type="text" label="lmax" help="Keeps sequence records whose sequence length is equal or shorter than lmax" /> - </when> - <when value="lmin"> - <param name="lmin" type="text" label="lmin" help="Keeps sequence records whose sequence length is equal or longer than lmin"/> - </when> - </conditional> - - <param name="v" type="boolean" truevalue="-v" falsevalue="" checked="false" label="Invert the sequence record selection (option -v)" /> - <expand macro="input_format_options_macro"/> - <expand macro="out_format_macro"/> - </inputs> - <outputs> - <data format="auto" name="output"/> - </outputs> - <tests> - <test> - <param name="input" value="illuminapairedend.output.fastq" /> - <conditional name="options_grep"> - <param name="options_grep_selector" value="lmin"/> - <param name="lmin" value="80"/> - </conditional> - <param name="v" value="true" /> - <param name="out_format" value="fastq" /> - <output name="output" file="output_obigrep_lmin.fastq" ftype="fastqsanger"/> - </test> - <test> - <param name="input" value="illuminapairedend.output.fastq.gz" ftype="fastq.gz" /> - <conditional name="options_grep"> - <param name="options_grep_selector" value="predicat"/> - <param name="predicat" value='mode!="joined"'/> - </conditional> - <param name="v" value="false" /> - <param name="out_format" value="fasta" /> - <output name="output" file="output_obigrep_predicat.fasta.gz" ftype="fasta.gz" decompress="true"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -The obigrep command is in some way analog to the standard Unix grep command. It selects a subset of sequence records from a sequence file. - -A sequence record is a complex object composed of an identifier, a set of attributes (key=value), a definition, and the sequence itself. - -Instead of working text line by text line as the standard Unix tool, selection is done sequence record by sequence record. A large set of options allows refining selection on any of the sequence record elements. - -Moreover obigrep allows specifying simultaneously several conditions (that take the value TRUE or FALSE) and only the sequence records that fulfill all the conditions (all conditions are TRUE) are selected. - -Sequence record selection options : -* sequence : Regular expression pattern to be tested against the sequence itself. ex: GAATTC - -* definition : Regular expression pattern to be tested against the definition of the sequence record. ex: [Cc]hloroplast - -* identifier : Regular expression pattern to be tested against the identifier of the sequence record. ex: ^GH - -* idlist : points to a text file containing the list of sequence record identifiers to be selected. - -* attribute : Regular expression pattern matched against the attributes of the sequence record. the value of this attribute is of the form : key:regular_pattern. ex:'family_name:Asteraceae' - -* hasattribute : Selects sequence records having an attribute whose key = KEY. - -* predicat : Python boolean expression to be evaluated for each sequence record. The attribute keys defined for each sequence record can be used in the expression as variable names. An extra variable named ‘sequence’ refers to the sequence record itself. ex: mode!="joined" - -* lmax : Keeps sequence records whose sequence length is equal or shorter than lmax. ex : 100 - -* lmin : Selects sequence records whose sequence length is equal or longer than lmin. ex : 100 - -@OBITOOLS_LINK@ - -]]> - - </help> - <expand macro="citation" /> - -</tool> -======= -<tool id="obi_grep" name="obigrep" version="@TOOL_VERSION@"> - <description>Filters sequence file</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - - <command> - - <![CDATA[ - obigrep - ${v} - #if str($options_grep.options_grep_selector) == 'sequence' - -s ${options_grep.sequence} - #else if str($options_grep.options_grep_selector) == 'definition' - -D ${options_grep.definition} - #else if str($options_grep.options_grep_selector) == 'identifier' - -I ${options_grep.identifier} - #else if str($options_grep.options_grep_selector) == 'idlist' - --id-list '$options_grep.idlist} - #else if str($options_grep.options_grep_selector) == 'attribute' - -a ${options_grep.attribute} - #else if str($options_grep.options_grep_selector) == 'hasattribute' - -A ${options_grep.attribute} - #else if str($options_grep.options_grep_selector) == 'predicat' - -p ${options_grep.predicat} - #else if str($options_grep.options_grep_selector) == 'lmax' - -L ${options_grep.lmax} - #else if str($options_grep.options_grep_selector) == 'lmin' - -l ${options_grep.lmin} - #end if - '$input' > '$output' - ]]> - - </command> - - <inputs> - <param name="input" type="data" format="fasta,fastq" label="Input sequences file" /> - <conditional name="options_grep"> - <param name="options_grep_selector" type="select" label="Choose the sequence record selection option" > - <option value="sequence" selected="true">sequence</option> - <option value="definition">definition</option> - <option value="identifier">identifier</option> - <option value="idlist">idlist</option> - <option value="attribute">attribute</option> - <option value="hasattribute">hasattribute</option> - <option value="predicat">predicat</option> - <option value="lmax">lmax</option> - <option value="lmin">lmin</option> - </param> - <when value="sequence"> - <param name="sequence" type="text" label="Regular expression pattern to be tested against the sequence itself. The pattern is case insensitive." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="definition"> - <param name="definition" type="text" label="Regular expression pattern to be tested against the definition of the sequence record. The pattern is case sensitive." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="identifier"> - <param name="identifier" type="text" label="Regular expression pattern to be tested against the identifier of the sequence record. The pattern is case sensitive." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="idlist"> - <param name="idlist" type="data" format="txt,tabular" label="points to a text file containing the list of sequence record identifiers to be selected. The file format consists in a single identifier per line." /> - </when> - <when value="attribute"> - <param name="attribute" type="text" label="Regular expression pattern matched against the attributes of the sequence record. the value of this attribute is of the form : key:regular_pattern. The pattern is case sensitive." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="hasattribute"> - <param name="hasattribute" type="text" label="Selects sequence records having an attribute who is key." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="predicat"> - <param name="predicat" type="text" label="Python boolean expression to be evaluated for each sequence record." help="The attribute keys defined for each sequence record can be used in the expression as variable names. An extra variable named sequenceefers to the sequence record itself." > - <expand macro="sanitizer" /> - </param> - </when> - <when value="lmax"> - <param name="lmax" type="text" label="lmax" help="Keeps sequence records whose sequence length is equal or shorter than lmax" /> - </when> - <when value="lmin"> - <param name="lmin" type="text" label="lmin" help="Keeps sequence records whose sequence length is equal or longer than lmin"/> - </when> - </conditional> - - <param name="v" type="boolean" truevalue="-v" falsevalue="" checked="false" label="Invert the sequence record selection (option -v)" /> - <param name="out_format" type="select" label="Output data type"> - <option value="fasta">fasta</option> - <option value="fastq">fastq</option> - </param> - - </inputs> - <outputs> - <data format="fastq" name="output" label="output with ${tool.name} on ${on_string}" > - <change_format> - <when input="out_format" value="fasta" format="fasta" /> - </change_format> - </data> - </outputs> - <tests> - <test> - <param name="input" value="illuminapairedend.output.fastq" /> - <conditional name="options_grep"> - <param name="options_grep_selector" value="lmin"/> - <param name="lmin" value="80"/> - </conditional> - <param name="v" value="true" /> - <param name="out_format" value="fastq" /> - <output name="output" file="output_obigrep_lmin.fastq" ftype="fastq"/> - </test> - <test> - <param name="input" value="illuminapairedend.output.fastq" /> - <conditional name="options_grep"> - <param name="options_grep_selector" value="predicat"/> - <param name="predicat" value='mode!="joined"'/> - </conditional> - <param name="v" value="false" /> - <param name="out_format" value="fasta" /> - <output name="output" file="output_obigrep_predicat.fasta" ftype="fasta"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -The obigrep command is in some way analog to the standard Unix grep command. It selects a subset of sequence records from a sequence file. - -A sequence record is a complex object composed of an identifier, a set of attributes (key=value), a definition, and the sequence itself. - -Instead of working text line by text line as the standard Unix tool, selection is done sequence record by sequence record. A large set of options allows refining selection on any of the sequence record elements. - -Moreover obigrep allows specifying simultaneously several conditions (that take the value TRUE or FALSE) and only the sequence records that fulfill all the conditions (all conditions are TRUE) are selected. - -Sequence record selection options : -* sequence : Regular expression pattern to be tested against the sequence itself. ex: GAATTC - -* definition : Regular expression pattern to be tested against the definition of the sequence record. ex: [Cc]hloroplast - -* identifier : Regular expression pattern to be tested against the identifier of the sequence record. ex: ^GH - -* idlist : points to a text file containing the list of sequence record identifiers to be selected. - -* attribute : Regular expression pattern matched against the attributes of the sequence record. the value of this attribute is of the form : key:regular_pattern. ex:'family_name:Asteraceae' - -* hasattribute : Selects sequence records having an attribute whose key = KEY. - -* predicat : Python boolean expression to be evaluated for each sequence record. The attribute keys defined for each sequence record can be used in the expression as variable names. An extra variable named ‘sequence’ refers to the sequence record itself. ex: mode!="joined" - -* lmax : Keeps sequence records whose sequence length is equal or shorter than lmax. ex : 100 - -* lmin : Selects sequence records whose sequence length is equal or longer than lmin. ex : 100 - -@OBITOOLS_LINK@ - -]]> - - </help> - <expand macro="citation" /> - -</tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/obisort.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,133 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_sort" name="obisort" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>sorts sequence records according to the value of a given attribute</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - - <command><![CDATA[ - @GUNZIP_INPUT@ - obisort - --without-progress-bar - -k '$key' - ${reverse} - @INPUT_FORMAT@ - @OUT_FORMAT@ - input - @GZIP_OUTPUT@ - > '$output' - @GENERATE_GALAXY_JSON@ - ]]></command> - <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@,txt,tabular" label="Input sequences file" /> - <param name="key" type="text" label="key"/> - <param name="reverse" type="boolean" checked="false" truevalue="-r" falsevalue="" label="sorts in reverse order?" /> - <expand macro="input_format_options_macro"/> - <expand macro="out_format_macro"/> - </inputs> - <outputs> - <data format="auto" name="output"/> - </outputs> - <tests> - <test> - <param name="input" value="output_obiclean_advanced.fasta" /> - <param name="key" value="count"/> - <param name="reverse" value="False"/> - <param name="out_format" value="fasta"/> - <output name="output" file="output_obisort.fasta" ftype="fasta"/> - </test> - <test> - <param name="input" value="output_obiclean_advanced.fasta" /> - <param name="key" value="count"/> - <param name="reverse" value="True"/> - <param name="out_format" value="fastq"/> - <output name="output" file="output_obisort.fastq" ftype="fastqsanger"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -obisort sorts sequence records according to the value of a given attribute, which can be either numeric or alphanumeric. - -@OBITOOLS_LINK@ - - ]]> - - </help> - <expand macro="citation" /> -</tool> -======= -<tool id="obi_sort" name="obisort" version="@TOOL_VERSION@"> - <description>sorts sequence records according to the value of a given attribute, which can be either numeric or alphanumeric</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - - <command> - - <![CDATA[ - obisort - -k '$key' - ${reverse} - '$input' > '$output' - ]]> - - </command> - - <inputs> - <param name="input" type="data" format="fastq,fasta,txt,tabular" label="Input sequences file" /> - <param name="key" type="text" label="key" /> - <param name="reverse" type="boolean" checked="false" truevalue="-r" falsevalue="" label="sorts in reverse order?" /> - <param name="out_format" type="select" label="Output data type"> - <option value="fasta">fasta</option> - <option value="fastq">fastq</option> - </param> - - </inputs> - <outputs> - <data format="fastq" name="output" label="output with ${tool.name} on ${on_string}" > - <change_format> - <when input="out_format" value="fasta" format="fasta" /> - </change_format> - </data> - </outputs> - <tests> - <test> - <param name="input" value="output_obiclean_advanced.fasta" /> - <param name="key" value="count"/> - <param name="reverse" value="False"/> - <param name="out_format" value="fasta"/> - <output name="output" file="output_obisort.fasta" ftype="fasta"/> - </test> - <test> - <param name="input" value="output_obiclean_advanced.fasta" /> - <param name="key" value="count"/> - <param name="reverse" value="True"/> - <param name="out_format" value="fastq"/> - <output name="output" file="output_obisort.fastq" ftype="fastq"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -obisort sorts sequence records according to the value of a given attribute, which can be either numeric or alphanumeric. - -@OBITOOLS_LINK@ - - ]]> - - </help> - <expand macro="citation" /> -</tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/obistat.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,288 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_stat" name="obistat" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>computes basic statistics for attribute values</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command><![CDATA[ - @GUNZIP_INPUT@ - - obistat - --without-progress-bar - #for $attribute in $catattributes - #if str( $attribute.options_attributespe.options_attributespe_selector) == "key" - -c '$attribute.options_attributespe.options_catattribute_selector' - #end if - #if str( $attribute.options_attributespe.options_attributespe_selector) == "python" - -c '${attribute.options_attributespe.attribute}' - #end if - #end for - - #if str( $options_attribute.options_attributebe_selector) == "yes" - #if str( $options_attribute.options_attribute_selector) == "min" - -m '$options_attribute.options_uniq_selector' - #end if - #if str( $options_attribute.options_attribute_selector) == "max" - -M '$options_attribute.options_uniq_selector' - #end if - #if str( $options_attribute.options_attribute_selector) == "mean" - -a '$options_attribute.options_uniq_selector' - #end if - #if str( $options_attribute.options_attribute_selector) == "variance" - -v '$options_attribute.options_uniq_selector' - #end if - #if str( $options_attribute.options_attribute_selector) == "std" - -s '$options_attribute.options_uniq_selector' - #end if - #end if - @INPUT_FORMAT@ - input > '$output' - ]]></command> - <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@" label="Input sequences file" /> - <repeat name="catattributes" title="Category attribute" min="0"> - <conditional name="options_attributespe"> - <param name="options_attributespe_selector" type="select" label="How would you specify the category attribute key?" > - <option value="key" selected="true">simply by a key of an attribute</option> - <option value="python">by a python expression</option> - </param> - <when value="python"> - <param name="attribute" type="text" label="Regular expression pattern matched against the attributes of the sequence record. the value of this attribute is of the form : key:regular_pattern. The pattern is case sensitive." > - <expand macro="sanitizer"/> - </param> - </when> - <when value="key"> - <param name="options_catattribute_selector" type="select" label="Attribute used to categorize the sequence records" > - <expand macro="attributes"/> - </param> - </when> - </conditional> - </repeat> - <conditional name="options_attribute"> - <param name="options_attributebe_selector" type="select" label="Use a specific option" > - <option value="None" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="yes"> - <param name="options_attribute_selector" type="select" label="Select your specific option" > - <option value="min" selected="true">min</option> - <option value="max">max</option> - <option value="mean">mean</option> - <option value="variance">variance</option> - <option value="std">standard deviation</option> - </param> - <param name="options_uniq_selector" type="select" label="Attribute to merge" > - <expand macro="attributes"/> - </param> - </when> - <when value="None"></when> - </conditional> - <expand macro="input_format_options_macro"/> - </inputs> - <outputs> - <data format="txt" name="output"/> - </outputs> - <tests> - <test> - <param name="input" value="output_obiannotate.fasta" ftype="fasta"/> - <conditional name="catattributes_0|options_attributespe"> - <param name="options_attributespe_selector" value="key"/> - <param name="options_catattribute_selector" value="count" /> - </conditional> - <conditional name="catattributes_1|options_attributespe"> - <param name="options_attributespe_selector" value="key"/> - <param name="options_catattribute_selector" value="merged" /> - </conditional> - <conditional name="options_attribute"> - <param name="options_attributebe_selector" value="yes"/> - <param name="options_attribute_selector" value="min" /> - <param name="options_uniq_selector" value="seq_length" /> - </conditional> - <output name="output" file="output_obistat.txt" ftype="txt" sort="True"/> - </test> - <test> - <param name="input" value="output_obiannotate.fasta.gz" ftype="fasta.gz" /> - <conditional name="catattributes_0|options_attributespe"> - <param name="options_attributespe_selector" value="key"/> - <param name="options_catattribute_selector" value="count" /> - </conditional> - <conditional name="catattributes_1|options_attributespe"> - <param name="options_attributespe_selector" value="key"/> - <param name="options_catattribute_selector" value="merged" /> - </conditional> - <conditional name="options_attribute"> - <param name="options_attributebe_selector" value="yes"/> - <param name="options_attribute_selector" value="min" /> - <param name="options_uniq_selector" value="seq_length" /> - </conditional> - <output name="output" file="output_obistat.txt" ftype="txt" sort="True"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -stats computes basic statistics for attribute values of sequence records. The sequence records can be categorized or not using one or several -c options. By default, only the number of sequence records and the total count are computed for each category. Additional statistics can be computed for attribute values in each category, like: - -\* minimum value (-m option) - -\* maximum value (-M option) - -\* mean value (-a option) - -\* variance (-v option) - -\* standard deviation (-s option) - -The result is a contingency table with the different categories in rows, and the computed statistics in columns. - -@OBITOOLS_LINK@ - - ]]> - </help> - <expand macro="citation" /> -</tool> -======= -<tool id="obi_stat" name="obistat" version="@TOOL_VERSION@"> - <description>computes basic statistics for attribute values</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - - <command> - - <![CDATA[ - - obistat - #for $attribute in $catattributes - #if str( $attribute.options_attributespe.options_attributespe_selector) == "key" - -c '$attribute.options_attributespe.options_catattribute_selector' - #end if - #if str( $attribute.options_attributespe.options_attributespe_selector) == "python" - -c '${attribute.options_attributespe.attribute}' - #end if - #end for - - #if str( $options_attribute.options_attributebe_selector) == "yes" - #if str( $options_attribute.options_attribute_selector) == "min" - -m '$options_attribute.options_uniq_selector' - #end if - #if str( $options_attribute.options_attribute_selector) == "max" - -M '$options_attribute.options_uniq_selector' - #end if - #if str( $options_attribute.options_attribute_selector) == "mean" - -a '$options_attribute.options_uniq_selector' - #end if - #if str( $options_attribute.options_attribute_selector) == "variance" - -v '$options_attribute.options_uniq_selector' - #end if - #if str( $options_attribute.options_attribute_selector) == "std" - -s '$options_attribute.options_uniq_selector' - #end if - #end if - - '$input' > '$output' - - ]]> - - - </command> - - <inputs> - <param name="input" type="data" format="fasta,fastq" label="Input sequences file" /> - - <repeat name="catattributes" title="Category attribute" min="0"> - <conditional name="options_attributespe"> - <param name="options_attributespe_selector" type="select" label="How would you specify the category attribute key?" > - <option value="key" selected="true">simply by a key of an attribute</option> - <option value="python">by a python expression</option> - </param> - <when value="python"> - <param name="attribute" type="text" label="Regular expression pattern matched against the attributes of the sequence record. the value of this attribute is of the form : key:regular_pattern. The pattern is case sensitive." > - <expand macro="sanitizer"/> - </param> - </when> - <when value="key"> - <param name="options_catattribute_selector" type="select" label="Attribute used to categorize the sequence records" > - <expand macro="attributes"/> - </param> - </when> - </conditional> - </repeat> - - <conditional name="options_attribute"> - <param name="options_attributebe_selector" type="select" label="Use a specific option" > - <option value="None" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="yes"> - <param name="options_attribute_selector" type="select" label="Select your specific option" > - <option value="min" selected="true">min</option> - <option value="max">max</option> - <option value="mean">mean</option> - <option value="variance">variance</option> - <option value="std">standard deviation</option> - </param> - <param name="options_uniq_selector" type="select" label="Attribute to merge" > - <expand macro="attributes"/> - </param> - </when> - <when value="None"></when> - </conditional> - </inputs> - <outputs> - <data format="txt" name="output" label="output.txt with ${tool.name} on ${on_string}" /> - </outputs> - <tests> - <test> - <param name="input" value="output_obiannotate.fasta" /> - <conditional name="catattributes_0|options_attributespe"> - <param name="options_attributespe_selector" value="key"/> - <param name="options_catattribute_selector" value="count" /> - </conditional> - <conditional name="catattributes_1|options_attributespe"> - <param name="options_attributespe_selector" value="key"/> - <param name="options_catattribute_selector" value="merged" /> - </conditional> - <conditional name="options_attribute"> - <param name="options_attributebe_selector" value="yes"/> - <param name="options_attribute_selector" value="min" /> - <param name="options_uniq_selector" value="seq_length" /> - </conditional> - <output name="output" file="output_obistat.txt" ftype="txt" sort="True"/> - </test> - </tests> - <help><![CDATA[ - -.. class:: infomark - -**What it does** - -stats computes basic statistics for attribute values of sequence records. The sequence records can be categorized or not using one or several -c options. By default, only the number of sequence records and the total count are computed for each category. Additional statistics can be computed for attribute values in each category, like: - -\* minimum value (-m option) - -\* maximum value (-M option) - -\* mean value (-a option) - -\* variance (-v option) - -\* standard deviation (-s option) - -The result is a contingency table with the different categories in rows, and the computed statistics in columns. - -@OBITOOLS_LINK@ - - ]]> - </help> - <expand macro="citation" /> -</tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/obitab.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_tab" name="obitab" version="@TOOL_VERSION@" profile="@PROFILE@"> - <description>converts sequence file to a tabular file</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command><![CDATA[ - @GUNZIP_INPUT@ - - obitab - --without-progress-bar - #if $additional_file - --omit-attribute='$additional_file' - #end if - ${output_seq_option} - ${no_definition_option} - @INPUT_FORMAT@ - input > '$output' - ]]></command> - <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@,txt,tabular" label="Input sequences file" /> - <param name="output_seq_option" type="boolean" checked="false" truevalue="-o" falsevalue="" label="Do you want to add an extra column?" help="Use this option if you want to add an extra column at the end of the table for the sequence itself"/> - <param name="no_definition_option" type="boolean" checked="false" truevalue="-d" falsevalue="" label="Do you want to remove column containing the sequence definition?" help="Use this option if you want to remove column containing the sequence definition in the output tab file"/> - <param name="additional_file" type="data" optional="true" format="tabular" label="filter attributes from tabular file" /> - <expand macro="input_format_options_macro"/> - </inputs> - <outputs> - <data format="tabular" name="output"/> - </outputs> - <tests> - <test> - <param name="input" value="output_obisort.fastq" /> - <param name="output_seq_option" value="False"/> - <param name="no_definition_option" value="True"/> - <output name="output" file="output_obitab.txt" ftype="tabular"/> - </test> - </tests> - <help><![CDATA[ - - -.. class:: infomark - -**What it does** - -obitab command converts sequence file to a tabular file that can be open by a spreadsheet program or R - -@OBITOOLS_LINK@ - - ]]> - </help> - <expand macro="citation" /> -</tool> -======= -<tool id="obi_tab" name="obitab" version="@TOOL_VERSION@"> - <description>converts sequence file to a tabular file that can be open by a spreadsheet program or R</description> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - - <command> - - <![CDATA[ - obitab - #if $additional_file - --omit-attribute='$additional_file' - #end if - ${output_seq_option} - ${no_definition_option} - ${options_inputtype} - ${options_seqtype} - - '$input' > '$output' - ]]> - - </command> - - <inputs> - <param name="input" type="data" format="fastq,fasta,txt,tabular" label="Input sequences file" /> - <param name="output_seq_option" type="boolean" checked="false" truevalue="-o" falsevalue="" label="Do you want to add an extra column?" help="Use this option if you want to add an extra column at the end of the table for the sequence itself"/> - <param name="no_definition_option" type="boolean" checked="false" truevalue="-d" falsevalue="" label="Do you want to remove column containing the sequence definition?" help="Use this option if you want to remove column containing the sequence definition in the output tab file"/> - <param name="additional_file" type="data" optional="true" format="tabular" label="filter attributes from tabular file" /> - <param name="options_inputtype" type="select" label="Specify the input datatype"> - <expand macro="inputtype"/> - </param> - <param name="options_seqtype" type="select" label="Specify the sequence datatype" > - <option value="--nuc" selected="true">nucleic</option> - <option value="--prot">protein</option> - </param> - </inputs> - <outputs> - <data format="tabular" name="output" label="output.tabular with ${tool.name} on ${on_string}" /> - </outputs> - <tests> - <test> - <param name="input" value="output_obisort.fastq" /> - <param name="output_seq_option" value="False"/> - <param name="no_definition_option" value="True"/> - <param name="options_inputtype" value="fasta"/> - <param name="options_seqtype" value="--prot"/> - <output name="output" file="output_obitab.txt" ftype="tabular"/> - </test> - </tests> - <help><![CDATA[ - - -.. class:: infomark - -**What it does** - -obitab command converts sequence file to a tabular file that can be open by a spreadsheet program or R - -@OBITOOLS_LINK@ - - ]]> - </help> - <expand macro="citation" /> - </tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/obiuniq.xml.orig Wed Sep 01 07:54:50 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,182 +0,0 @@ -<<<<<<< HEAD -<tool id="obi_uniq" name="obiuniq" version="@TOOL_VERSION@" profile="@PROFILE@"> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> - <command><![CDATA[ - @GUNZIP_INPUT@ - - obiuniq - --without-progress-bar - #if str( $options_attribute) == "merge" - -m '$options_uniq' - #else - -c '$options_uniq' - #end if - ${mid} - ${prefix} - @INPUT_FORMAT@ - input - @GZIP_OUTPUT@ - > '$output' - @GENERATE_GALAXY_JSON@ - ]]></command> - <inputs> - <param name="input" type="data" format="@INPUT_FORMATS@" label="Input sequences file" /> - <param name="options_uniq" type="select" label="Attribute to merge" > - <expand macro="attributes"/> - </param> - <param name="options_attribute" type="select" label="Use specific option" > - <option value="merge" selected="true">merge</option> - <option value="category_attribute">category_attribute</option> - </param> - <param name="mid" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Add a merged attribute containing the list of sequence record ids merged within this group" /> - <param name="prefix" type="boolean" checked="false" truevalue="-p" falsevalue="" label="Dereplicate through a prefix matching" /> - <expand macro="input_format_options_macro"/> - </inputs> - <outputs> - <data format="auto" name="output"/> - </outputs> - <tests> - <test> - <param name="input" value="output_ngsfilter_error_3.fastq" ftype="fastqsanger"/> - <param name="options_uniq" value="sample" /> - <param name="options_attribute" value="merge" /> - <param name="mid" value="False" /> - <param name="prefix" value="False" /> - <output name="output" file="output_obiuniq.fasta" ftype="fasta"/> - </test> - <test> - <param name="input" value="output_ngsfilter_error_3.fastq.gz" ftype="fastqsanger.gz"/> - <param name="options_uniq" value="family" /> - <param name="options_attribute" value="category_attribute" /> - <param name="mid" value="True" /> - <param name="prefix" value="True" /> - <output name="output" file="output_obiuniq_family.fasta.gz" ftype="fasta.gz" decompress="true"/> - </test> - </tests> - <help><![CDATA[ - .. class:: infomark - - **What it does** - - The obiuniq command is in some way analog to the standard Unix uniq -c command. - - Instead of working text line by text line as the standard Unix tool, the processing is done on sequence records. - - A sequence record is a complex object composed of an identifier, a set of attributes (key=value), a definition, and the sequence itself. - - The obiuniq command groups together sequence records. Then, for each group, a sequence record is printed. - - A group is defined by the sequence and optionally by the values of a set of attributes specified with the -c option. - - As the identifier, the set of attributes (key=value) and the definition of the sequence records that are grouped together may be different, two options (-m and -i) allow refining how these parts of the records are reported. - - \* By default, only attributes with identical values within a group of sequence records are kept. - - \* A count attribute is set to the total number of sequence records for each group. - - \* For each attribute specified by the -m option, a new attribute whose key is prefixed by merged_ is created. These new attributes contain the number of times each value occurs within the group of sequence records. - - - @OBITOOLS_LINK@ - ]]></help> - <expand macro="citation" /> -</tool> -======= -<tool id="obi_uniq" name="obiuniq" version="@TOOL_VERSION@"> - <expand macro="bio_tools"/> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"/> - <expand macro="stdio"/> -<command> - - <![CDATA[ - - obiuniq - #if str( $options_attribute) == "merge" - -m '$options_uniq' - #else - -c '$options_uniq' - #end if - ${mid} - ${prefix} - '$input' > '$output' - - ]]> - -</command> - -<inputs> - <param name="input" type="data" format="fasta,fastq" label="Input sequences file" /> - <param name="options_uniq" type="select" label="Attribute to merge" > - <expand macro="attributes"/> - </param> - <param name="options_attribute" type="select" label="Use specific option" > - <option value="merge" selected="true">merge</option> - <option value="category_attribute">category_attribute</option> - </param> - <param name="mid" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Add a merged attribute containing the list of sequence record ids merged within this group" /> - <param name="prefix" type="boolean" checked="false" truevalue="-p" falsevalue="" label="Dereplicate through a prefix matching" /> -</inputs> -<outputs> - <data format="fasta" name="output" label="output.fasta with ${tool.name} on ${on_string}" /> -</outputs> - -<tests> - <test> - <param name="input" value="output_ngsfilter_error_3.fastq" /> - <param name="options_uniq" value="sample" /> - <param name="options_attribute" value="merge" /> - <param name="mid" value="False" /> - <param name="prefix" value="False" /> - <output name="output" file="output_obiuniq.fasta" ftype="fasta"/> - </test> - <test> - <param name="input" value="output_ngsfilter_error_3.fastq" /> - <param name="options_uniq" value="family" /> - <param name="options_attribute" value="category_attribute" /> - <param name="mid" value="True" /> - <param name="prefix" value="True" /> - <output name="output" file="output_obiuniq_family.fasta" ftype="fasta"/> - </test> - -</tests> -<help><![CDATA[ - -.. class:: infomark - -**What it does** - -The obiuniq command is in some way analog to the standard Unix uniq -c command. - -Instead of working text line by text line as the standard Unix tool, the processing is done on sequence records. - -A sequence record is a complex object composed of an identifier, a set of attributes (key=value), a definition, and the sequence itself. - -The obiuniq command groups together sequence records. Then, for each group, a sequence record is printed. - -A group is defined by the sequence and optionally by the values of a set of attributes specified with the -c option. - -As the identifier, the set of attributes (key=value) and the definition of the sequence records that are grouped together may be different, two options (-m and -i) allow refining how these parts of the records are reported. - -\* By default, only attributes with identical values within a group of sequence records are kept. - -\* A count attribute is set to the total number of sequence records for each group. - -\* For each attribute specified by the -m option, a new attribute whose key is prefixed by merged_ is created. These new attributes contain the number of times each value occurs within the group of sequence records. - - -@OBITOOLS_LINK@ - - - -]]> -</help> -<expand macro="citation" /> -</tool> ->>>>>>> 7abad681f (add tools up until P)
--- a/test-data/input_ngsfilter_extrafile.txt Wed Sep 01 07:54:50 2021 +0000 +++ b/test-data/input_ngsfilter_extrafile.txt Thu Oct 30 16:01:34 2025 +0000 @@ -1,4 +1,4 @@ -wolf_diet 13a_F730603 aattaac TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @ -wolf_diet 15a_F730814 gaagtag TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @ -wolf_diet 26a_F040644 gaatatc TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @ -wolf_diet 29a_F260619 gcctcct TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @ +wolf_diet 13a_F730603 aattaac TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @ +wolf_diet 15a_F730814 gaagtag TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @ +wolf_diet 26a_F040644 gaatatc TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @ +wolf_diet 29a_F260619 gcctcct TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @
