Mercurial > repos > iuc > mash_paste
diff mash_sketch.xml @ 0:1aab9cf41bd0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit f4e45447b1a8f2d99f3feda11a7c7d2bc7f84a57
| author | iuc |
|---|---|
| date | Wed, 24 Sep 2025 13:46:33 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mash_sketch.xml Wed Sep 24 13:46:33 2025 +0000 @@ -0,0 +1,195 @@ +<tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@"> + <description>Create a reduced sequence representation based on min-hashes</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xref"/> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + + #if str ( $reads_assembly.reads_assembly_selector ) == "reads": + #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired": + ln -s '$reads_assembly.reads_input.reads_1' "${reads_assembly.reads_input.reads_1.element_identifier}" && + ln -s '$reads_assembly.reads_input.reads_2' "${reads_assembly.reads_input.reads_1.element_identifier}" && + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection": + ln -s '$reads_assembly.reads_input.reads.forward' "${reads_assembly.reads_input.reads.forward.element_identifier}" && + ln -s '$reads_assembly.reads_input.reads.reverse' "${reads_assembly.reads_input.reads.reverse.element_identifier}" && + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "single": + ln -s '$reads_assembly.reads_input.reads' "${reads_assembly.reads_input.reads.element_identifier}" && + #end if + #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly": + ln -s '${reads_assembly.assembly}' "${reads_assembly.assembly.element_identifier}" && + #end if + + mash sketch + -s '${sketch_size}' + -k '${kmer_size}' + -w '${prob_threshold}' + #if str( $reads_assembly.reads_assembly_selector ) == "reads": + -m '${reads_assembly.minimum_kmer_copies}' + -r + #if $reads_assembly.target_coverage : + -c '${reads_assembly.target_coverage}' + #end if + #if $reads_assembly.genome_size : + -g '${reads_assembly.genome_size}' + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" : + "${reads_assembly.reads_input.reads_1.element_identifier}" "${reads_assembly.reads_input.reads_1.element_identifier}}" + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection": + "${reads_assembly.reads_input.reads.forward.element_identifier}" "${reads_assembly.reads_input.reads.reverse.element_identifier}" + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "single": + "${reads_assembly.reads_input.reads.element_identifier}" + #end if + #elif str( $reads_assembly.reads_assembly_selector ) == "assembly": + -p \${GALAXY_SLOTS:-1} + ${reads_assembly.individual_sequences} + "${reads_assembly.assembly.element_identifier}" + #end if + -o 'sketch' + ]]></command> + <inputs> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies"> + <option selected="True" value="reads">Reads</option> + <option value="assembly">Assembly</option> + </param> + <when value="reads"> + <conditional name="reads_input"> + <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> + <option value="paired">Paired</option> + <option value="single">Single</option> + <option value="paired_collection">Paired Collection</option> + </param> + <when value="paired"> + <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/> + <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/> + </when> + <when value="single"> + <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/> + </when> + <when value="paired_collection"> + <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> + </when> + </conditional> + <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/> + <param type="integer" name="target_coverage" argument="-c" value="" min="0" max="500" optional="true" label="Target coverage" help="If specified, sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity)"/> + <param type="integer" name="genome_size" argument="-g" value="" min="1000" max="100000000000" optional="true" label="Genome size" help="If specified, will be used for p-value calculation instead of an estimated size from k-mer content"/> + </when> + <when value="assembly"> + <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/> + <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences" help="Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons"/> + </when> + </conditional> + <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes"/> + <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" /> + <param type="float" name="prob_threshold" argument="-w" value="0.01" min="0" max="1" label="Probability threshold for warning about low k-mer size" /> + </inputs> + <outputs> + <data name="sketch" format="msh" from_work_dir="sketch.msh"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + </conditional> + <output name="sketch" file="test_01_mash_sketch.msh" compare="sim_size" /> + </test> + <test expect_num_outputs="1"> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + <param name="minimum_kmer_copies" value="10"/> + </conditional> + <output name="sketch" file="test_02_mash_sketch.msh" compare="sim_size" /> + </test> + <test expect_num_outputs="1"> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + <param name="target_coverage" value="1"/> + </conditional> + <output name="sketch" file="test_03_mash_sketch.msh" compare="sim_size" /> + </test> + <test expect_num_outputs="1"> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + <param name="genome_size" value="1000"/> + </conditional> + <output name="sketch" file="test_04_mash_sketch.msh" compare="sim_size" /> + </test> + <test expect_num_outputs="1"> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + </conditional> + <param name="sketch_size" value="500"/> + <output name="sketch" file="test_05_mash_sketch.msh" compare="sim_size" /> + </test> + <test expect_num_outputs="1"> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + </conditional> + <param name="kmer_size" value="17"/> + <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" /> + </test> + <test expect_num_outputs="1"> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="reads"/> + <conditional name="reads_input"> + <param name="reads_input_selector" value="single"/> + <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> + </conditional> + </conditional> + <param name="prob_threshold" value="0.1"/> + <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" /> + </test> + <test expect_num_outputs="1"> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" value="assembly"/> + <param name="assembly" value="test_assembly.fasta"/> + </conditional> + <output name="sketch" file="test_07_mash_sketch.msh" compare="sim_size" /> + </test> + </tests> + <help><![CDATA[ + +**What it does** + + Create a sketch file, which is a reduced representation of a sequence or set + of sequences (based on min-hashes) that can be used for fast distance + estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can + be given to read from standard input. Input files can also be files of file + names (see -l). For output, one sketch file will be generated, but it can have + multiple sketches within it, divided by sequences or files (see -i). By + default, the output file name will be the first input file with a '.msh' + extension, or 'stdin.msh' if standard input is used (see -o). + ]]></help> + <expand macro="citations"/> +</tool>
