Mercurial > repos > iuc > seqkit_locate
comparison seqkit_locate.xml @ 0:e4c45e798464 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqkit commit 202bb1229cb0b8e8040a87d140edb6fdf7654628
| author | iuc |
|---|---|
| date | Thu, 03 Nov 2022 19:33:06 +0000 |
| parents | |
| children | 592e41e46dad |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e4c45e798464 |
|---|---|
| 1 <tool id="seqkit_locate" name="SeqKit locate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>subsequences/motifs, mismatch allowed</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="bio_tools"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 #import re | |
| 10 | |
| 11 #set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier)) | |
| 12 ln -s '${input}' '${input_identifier}' && | |
| 13 | |
| 14 seqkit locate | |
| 15 --threads \${GALAXY_SLOTS:-4} | |
| 16 #if $conditional_pattern.mode == 'expression' | |
| 17 --pattern '"$conditional_pattern.pattern"' | |
| 18 $conditional_pattern.use_regexp | |
| 19 #else | |
| 20 --pattern-file '$conditional_pattern.pattern_file' | |
| 21 #end if | |
| 22 $output_mode | |
| 23 --validate-seq-length $advanced_options.validate_seq_length | |
| 24 $advanced_options.circular | |
| 25 $advanced_options.degenerate | |
| 26 $advanced_options.hide_matched | |
| 27 $advanced_options.ignore_case | |
| 28 #if not $advanced_options.degenerate | |
| 29 --max-mismatch $advanced_options.max_mismatch | |
| 30 $advanced_options.use_fmi | |
| 31 #end if | |
| 32 $advanced_options.non_greedy | |
| 33 $advanced_options.only_positive_strand | |
| 34 $advanced_options.id_ncbi | |
| 35 '${input_identifier}' | |
| 36 > '$output' | |
| 37 ]]></command> | |
| 38 <inputs> | |
| 39 <param name="input" type="data" format="fasta,fasta.gz" label="Input file"/> | |
| 40 <conditional name="conditional_pattern"> | |
| 41 <param name="mode" type="select" label="Pattern mode" | |
| 42 help="Specify a pattern/motif sequence or a FASTA file with the motif of interest. Motifs could be EITHER plain sequence containing 'ACTGN' OR regular | |
| 43 expression like 'A[TU]G(?:.{3})+?[TU](?:AG|AA|GA)' for ORFs"> | |
| 44 <option value="expression">Pattern/motif sequence</option> | |
| 45 <option value="file">FASTA file with the pattern/motif of interest</option> | |
| 46 </param> | |
| 47 <when value="expression"> | |
| 48 <param argument="--pattern" type="text" value="" label="Pattern/motif sequence" help="Perl regular expressions are allowed"> | |
| 49 <sanitizer invalid_char=""> | |
| 50 <valid initial="string.letters,string.digits"> | |
| 51 <add value="^"/> | |
| 52 <add value="$"/> | |
| 53 <add value="("/> | |
| 54 <add value=")"/> | |
| 55 <add value="|"/> | |
| 56 <add value="?"/> | |
| 57 <add value="*"/> | |
| 58 <add value="+"/> | |
| 59 <add value="{"/> | |
| 60 <add value="}"/> | |
| 61 <add value="\"/> | |
| 62 <add value="["/> | |
| 63 <add value="]"/> | |
| 64 <add value="."/> | |
| 65 <add value=","/> | |
| 66 </valid> | |
| 67 </sanitizer> | |
| 68 <validator type="regex" message="Pattern must not end with backslash.">.*[^\\]$</validator> | |
| 69 </param> | |
| 70 <param argument="--use-regexp" type="boolean" truevalue="--use-regexp" falsevalue="" checked="false" label="Pattern/motifs are regular expressions"/> | |
| 71 </when> | |
| 72 <when value="file"> | |
| 73 <param argument="--pattern-file" type="data" format="fasta" label="Pattern/motif file"/> | |
| 74 </when> | |
| 75 </conditional> | |
| 76 <param name="output_mode" type="select" label="Output mode"> | |
| 77 <option value="">Tabular (default format)</option> | |
| 78 <option value="--gtf">GTF</option> | |
| 79 <option value="--bed">BED6</option> | |
| 80 </param> | |
| 81 <section name="advanced_options" title="Advanced options"> | |
| 82 <param argument="--validate-seq-length" type="integer" min="0" value="10000" label="Lenth of the sequence to validate" help="Default: 10000" /> | |
| 83 <param argument="--circular" type="boolean" truevalue="--circular" falsevalue="" checked="false" label="Circular genome" | |
| 84 help="When using flag --circular, end position of matched subsequence that crossing genome sequence end would be greater than sequence length" /> | |
| 85 <param argument="--degenerate" type="boolean" truevalue="--degenerate" falsevalue="" checked="false" label="Pattern/motif contains degenerate bases" | |
| 86 help="Do not use degenerate bases/residues in regular expression, you need convert them to regular expression, e.g., change 'N' or 'X' to '.'"/> | |
| 87 <param argument="--hide-matched" type="boolean" truevalue="--hide-matched" falsevalue="" checked="false" label="Do not show matched sequences"/> | |
| 88 <param argument="--ignore-case" type="boolean" truevalue="--ignore-case" falsevalue="" checked="false" label="Ignore case"/> | |
| 89 <param argument="--max-mismatch" type="integer" min="0" value="0" label="Maximum mismatch" help="For large genomes like human genome, using mapping/alignment tools would be faster" /> | |
| 90 <param argument="--non-greedy" type="boolean" truevalue="--non-greedy" falsevalue="" checked="false" label="Non-greedy mode" help="Faster, but muy miss motifs overlapping with others" /> | |
| 91 <param argument="--only-positive-strand" type="boolean" truevalue="--only-positive-strand" falsevalue="" checked="false" label="Only search on positive strand"/> | |
| 92 <param argument="--use-fmi" type="boolean" truevalue="--use-fmi" falsevalue="" checked="false" label="FM-index" | |
| 93 help="Use FM-index for much faster search of lots of sequence patterns. This option is not compatible with the --degenerate option"/> | |
| 94 <param argument="--id-ncbi" type="boolean" truevalue="--id-ncbi" falsevalue="" checked="false" label="FASTA head is NCBI stype" help="Example: >gi|110645304|ref|NC_002516.2| Pseud..." /> | |
| 95 </section> | |
| 96 </inputs> | |
| 97 <outputs> | |
| 98 <data name="output" format="tabular" label="${tool.name} on ${on_string}"> | |
| 99 <change_format> | |
| 100 <when input="output_mode" value="--gtf" format="gtf"/> | |
| 101 <when input="output_mode" value="--bed" format="bed"/> | |
| 102 </change_format> | |
| 103 </data> | |
| 104 </outputs> | |
| 105 <tests> | |
| 106 <test expect_num_outputs="1"> | |
| 107 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
| 108 <conditional name="conditional_pattern"> | |
| 109 <param name="mode" value="expression"/> | |
| 110 <param name="pattern" value="ATAGAT"/> | |
| 111 </conditional> | |
| 112 <section name="advanced_options"> | |
| 113 <param name="max_mismatch" value="1"/> | |
| 114 </section> | |
| 115 <output name="output" file="locate_output1.tabular" ftype="tabular"/> | |
| 116 </test> | |
| 117 <test expect_num_outputs="1"> | |
| 118 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
| 119 <conditional name="conditional_pattern"> | |
| 120 <param name="mode" value="expression"/> | |
| 121 <param name="pattern" value="A[TU]G"/> | |
| 122 <param name="use_regexp" value="true"/> | |
| 123 </conditional> | |
| 124 <param name="output_mode" value="--bed"/> | |
| 125 <section name="advanced_options"> | |
| 126 <param name="circular" value="true"/> | |
| 127 <param name="hide_matched" value="true"/> | |
| 128 <param name="ignore_case" value="true"/> | |
| 129 <param name="only_positive_strand" value="true"/> | |
| 130 <param name="id_ncbi" value="true"/> | |
| 131 </section> | |
| 132 <output name="output" file="locate_output2.bed" ftype="bed"/> | |
| 133 </test> | |
| 134 <test expect_num_outputs="1"> | |
| 135 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
| 136 <conditional name="conditional_pattern"> | |
| 137 <param name="mode" value="file"/> | |
| 138 <param name="pattern_file" value="motif_sequence.fasta"/> | |
| 139 </conditional> | |
| 140 <param name="output_mode" value="--gtf"/> | |
| 141 <section name="advanced_options"> | |
| 142 <param name="use_fmi" value="true"/> | |
| 143 </section> | |
| 144 <output name="output" file="locate_output3.gtf" ftype="gtf"/> | |
| 145 </test> | |
| 146 </tests> | |
| 147 <help> | |
| 148 .. class:: infomark | |
| 149 | |
| 150 **Purpose** | |
| 151 | |
| 152 Locate subsequences/motifs, mismatch allowed. | |
| 153 | |
| 154 ------ | |
| 155 | |
| 156 .. class:: infomark | |
| 157 | |
| 158 **Attention** | |
| 159 | |
| 160 1. Motifs could be EITHER plain sequence containing "ACTGN" OR regular | |
| 161 expression like "A[TU]G(?:.{3})+?[TU](?:AG|AA|GA)" for ORFs. | |
| 162 2. Degenerate bases/residues like "RYMM.." are also supported by flag -d. | |
| 163 But do not use degenerate bases/residues in regular expression, you need | |
| 164 convert them to regular expression, e.g., change "N" or "X" to ".". | |
| 165 3. When providing search patterns (motifs) via flag '-p', | |
| 166 please use double quotation marks for patterns containing comma, | |
| 167 e.g., -p '"A{2,}"' or -p "\"A{2,}\"". Because the command line argument | |
| 168 parser accepts comma-separated-values (CSV) for multiple values (motifs). | |
| 169 Patterns in file do not follow this rule. | |
| 170 4. Mismatch is allowed using flag "-m/--max-mismatch", | |
| 171 you can increase the value of "-j/--threads" to accelerate processing. | |
| 172 5. When using flag --circular, end position of matched subsequence that | |
| 173 crossing genome sequence end would be greater than sequence length. | |
| 174 </help> | |
| 175 <expand macro="citations"/> | |
| 176 </tool> | |
| 177 |
