Mercurial > repos > iuc > mash_paste
comparison mash_sketch.xml @ 0:1aab9cf41bd0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit f4e45447b1a8f2d99f3feda11a7c7d2bc7f84a57
| author | iuc |
|---|---|
| date | Wed, 24 Sep 2025 13:46:33 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1aab9cf41bd0 |
|---|---|
| 1 <tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@"> | |
| 2 <description>Create a reduced sequence representation based on min-hashes</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="xref"/> | |
| 7 <expand macro="requirements" /> | |
| 8 <expand macro="version_command" /> | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 | |
| 11 #if str ( $reads_assembly.reads_assembly_selector ) == "reads": | |
| 12 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired": | |
| 13 ln -s '$reads_assembly.reads_input.reads_1' "${reads_assembly.reads_input.reads_1.element_identifier}" && | |
| 14 ln -s '$reads_assembly.reads_input.reads_2' "${reads_assembly.reads_input.reads_1.element_identifier}" && | |
| 15 #end if | |
| 16 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection": | |
| 17 ln -s '$reads_assembly.reads_input.reads.forward' "${reads_assembly.reads_input.reads.forward.element_identifier}" && | |
| 18 ln -s '$reads_assembly.reads_input.reads.reverse' "${reads_assembly.reads_input.reads.reverse.element_identifier}" && | |
| 19 #end if | |
| 20 #if str( $reads_assembly.reads_input.reads_input_selector ) == "single": | |
| 21 ln -s '$reads_assembly.reads_input.reads' "${reads_assembly.reads_input.reads.element_identifier}" && | |
| 22 #end if | |
| 23 #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly": | |
| 24 ln -s '${reads_assembly.assembly}' "${reads_assembly.assembly.element_identifier}" && | |
| 25 #end if | |
| 26 | |
| 27 mash sketch | |
| 28 -s '${sketch_size}' | |
| 29 -k '${kmer_size}' | |
| 30 -w '${prob_threshold}' | |
| 31 #if str( $reads_assembly.reads_assembly_selector ) == "reads": | |
| 32 -m '${reads_assembly.minimum_kmer_copies}' | |
| 33 -r | |
| 34 #if $reads_assembly.target_coverage : | |
| 35 -c '${reads_assembly.target_coverage}' | |
| 36 #end if | |
| 37 #if $reads_assembly.genome_size : | |
| 38 -g '${reads_assembly.genome_size}' | |
| 39 #end if | |
| 40 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" : | |
| 41 "${reads_assembly.reads_input.reads_1.element_identifier}" "${reads_assembly.reads_input.reads_1.element_identifier}}" | |
| 42 #end if | |
| 43 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection": | |
| 44 "${reads_assembly.reads_input.reads.forward.element_identifier}" "${reads_assembly.reads_input.reads.reverse.element_identifier}" | |
| 45 #end if | |
| 46 #if str( $reads_assembly.reads_input.reads_input_selector ) == "single": | |
| 47 "${reads_assembly.reads_input.reads.element_identifier}" | |
| 48 #end if | |
| 49 #elif str( $reads_assembly.reads_assembly_selector ) == "assembly": | |
| 50 -p \${GALAXY_SLOTS:-1} | |
| 51 ${reads_assembly.individual_sequences} | |
| 52 "${reads_assembly.assembly.element_identifier}" | |
| 53 #end if | |
| 54 -o 'sketch' | |
| 55 ]]></command> | |
| 56 <inputs> | |
| 57 <conditional name="reads_assembly"> | |
| 58 <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies"> | |
| 59 <option selected="True" value="reads">Reads</option> | |
| 60 <option value="assembly">Assembly</option> | |
| 61 </param> | |
| 62 <when value="reads"> | |
| 63 <conditional name="reads_input"> | |
| 64 <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> | |
| 65 <option value="paired">Paired</option> | |
| 66 <option value="single">Single</option> | |
| 67 <option value="paired_collection">Paired Collection</option> | |
| 68 </param> | |
| 69 <when value="paired"> | |
| 70 <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/> | |
| 71 <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/> | |
| 72 </when> | |
| 73 <when value="single"> | |
| 74 <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/> | |
| 75 </when> | |
| 76 <when value="paired_collection"> | |
| 77 <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> | |
| 78 </when> | |
| 79 </conditional> | |
| 80 <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/> | |
| 81 <param type="integer" name="target_coverage" argument="-c" value="" min="0" max="500" optional="true" label="Target coverage" help="If specified, sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity)"/> | |
| 82 <param type="integer" name="genome_size" argument="-g" value="" min="1000" max="100000000000" optional="true" label="Genome size" help="If specified, will be used for p-value calculation instead of an estimated size from k-mer content"/> | |
| 83 </when> | |
| 84 <when value="assembly"> | |
| 85 <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/> | |
| 86 <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences" help="Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons"/> | |
| 87 </when> | |
| 88 </conditional> | |
| 89 <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes"/> | |
| 90 <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" /> | |
| 91 <param type="float" name="prob_threshold" argument="-w" value="0.01" min="0" max="1" label="Probability threshold for warning about low k-mer size" /> | |
| 92 </inputs> | |
| 93 <outputs> | |
| 94 <data name="sketch" format="msh" from_work_dir="sketch.msh"/> | |
| 95 </outputs> | |
| 96 <tests> | |
| 97 <test expect_num_outputs="1"> | |
| 98 <conditional name="reads_assembly"> | |
| 99 <param name="reads_assembly_selector" value="reads"/> | |
| 100 <conditional name="reads_input"> | |
| 101 <param name="reads_input_selector" value="single"/> | |
| 102 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> | |
| 103 </conditional> | |
| 104 </conditional> | |
| 105 <output name="sketch" file="test_01_mash_sketch.msh" compare="sim_size" /> | |
| 106 </test> | |
| 107 <test expect_num_outputs="1"> | |
| 108 <conditional name="reads_assembly"> | |
| 109 <param name="reads_assembly_selector" value="reads"/> | |
| 110 <conditional name="reads_input"> | |
| 111 <param name="reads_input_selector" value="single"/> | |
| 112 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> | |
| 113 </conditional> | |
| 114 <param name="minimum_kmer_copies" value="10"/> | |
| 115 </conditional> | |
| 116 <output name="sketch" file="test_02_mash_sketch.msh" compare="sim_size" /> | |
| 117 </test> | |
| 118 <test expect_num_outputs="1"> | |
| 119 <conditional name="reads_assembly"> | |
| 120 <param name="reads_assembly_selector" value="reads"/> | |
| 121 <conditional name="reads_input"> | |
| 122 <param name="reads_input_selector" value="single"/> | |
| 123 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> | |
| 124 </conditional> | |
| 125 <param name="target_coverage" value="1"/> | |
| 126 </conditional> | |
| 127 <output name="sketch" file="test_03_mash_sketch.msh" compare="sim_size" /> | |
| 128 </test> | |
| 129 <test expect_num_outputs="1"> | |
| 130 <conditional name="reads_assembly"> | |
| 131 <param name="reads_assembly_selector" value="reads"/> | |
| 132 <conditional name="reads_input"> | |
| 133 <param name="reads_input_selector" value="single"/> | |
| 134 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> | |
| 135 </conditional> | |
| 136 <param name="genome_size" value="1000"/> | |
| 137 </conditional> | |
| 138 <output name="sketch" file="test_04_mash_sketch.msh" compare="sim_size" /> | |
| 139 </test> | |
| 140 <test expect_num_outputs="1"> | |
| 141 <conditional name="reads_assembly"> | |
| 142 <param name="reads_assembly_selector" value="reads"/> | |
| 143 <conditional name="reads_input"> | |
| 144 <param name="reads_input_selector" value="single"/> | |
| 145 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> | |
| 146 </conditional> | |
| 147 </conditional> | |
| 148 <param name="sketch_size" value="500"/> | |
| 149 <output name="sketch" file="test_05_mash_sketch.msh" compare="sim_size" /> | |
| 150 </test> | |
| 151 <test expect_num_outputs="1"> | |
| 152 <conditional name="reads_assembly"> | |
| 153 <param name="reads_assembly_selector" value="reads"/> | |
| 154 <conditional name="reads_input"> | |
| 155 <param name="reads_input_selector" value="single"/> | |
| 156 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> | |
| 157 </conditional> | |
| 158 </conditional> | |
| 159 <param name="kmer_size" value="17"/> | |
| 160 <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" /> | |
| 161 </test> | |
| 162 <test expect_num_outputs="1"> | |
| 163 <conditional name="reads_assembly"> | |
| 164 <param name="reads_assembly_selector" value="reads"/> | |
| 165 <conditional name="reads_input"> | |
| 166 <param name="reads_input_selector" value="single"/> | |
| 167 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> | |
| 168 </conditional> | |
| 169 </conditional> | |
| 170 <param name="prob_threshold" value="0.1"/> | |
| 171 <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" /> | |
| 172 </test> | |
| 173 <test expect_num_outputs="1"> | |
| 174 <conditional name="reads_assembly"> | |
| 175 <param name="reads_assembly_selector" value="assembly"/> | |
| 176 <param name="assembly" value="test_assembly.fasta"/> | |
| 177 </conditional> | |
| 178 <output name="sketch" file="test_07_mash_sketch.msh" compare="sim_size" /> | |
| 179 </test> | |
| 180 </tests> | |
| 181 <help><![CDATA[ | |
| 182 | |
| 183 **What it does** | |
| 184 | |
| 185 Create a sketch file, which is a reduced representation of a sequence or set | |
| 186 of sequences (based on min-hashes) that can be used for fast distance | |
| 187 estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can | |
| 188 be given to read from standard input. Input files can also be files of file | |
| 189 names (see -l). For output, one sketch file will be generated, but it can have | |
| 190 multiple sketches within it, divided by sequences or files (see -i). By | |
| 191 default, the output file name will be the first input file with a '.msh' | |
| 192 extension, or 'stdin.msh' if standard input is used (see -o). | |
| 193 ]]></help> | |
| 194 <expand macro="citations"/> | |
| 195 </tool> |
