Mercurial > repos > iuc > seqkit_grep
comparison seqkit_grep.xml @ 0:c5d75079615d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqkit commit 76c1a289f15cc9a9a7d9a49dc132af62cc1d5af2
| author | iuc |
|---|---|
| date | Fri, 26 Sep 2025 16:47:08 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c5d75079615d |
|---|---|
| 1 <tool id="seqkit_grep" name="SeqKit grep" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>grep-like tools for FASTA/Q files</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="bio_tools"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 #import re | |
| 10 | |
| 11 #set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier)) | |
| 12 ln -s '${input}' '${input_identifier}' && | |
| 13 | |
| 14 seqkit grep | |
| 15 --threads "\${GALAXY_SLOTS:-4}" | |
| 16 #if $conditional_pattern.mode == 'expression' | |
| 17 --pattern '"$conditional_pattern.pattern"' | |
| 18 $conditional_pattern.use_regexp | |
| 19 #else | |
| 20 --pattern-file '$conditional_pattern.pattern_file' | |
| 21 #end if | |
| 22 $search_options.allow_duplicated_patterns | |
| 23 $search_options.by_name | |
| 24 $search_options.by_seq | |
| 25 $search_options.circular | |
| 26 $search_options.count | |
| 27 $search_options.degenerate | |
| 28 $search_options.delete_matched | |
| 29 $search_options.ignore_case | |
| 30 $search_options.invert_match | |
| 31 #if $search_options.by_seq and not $search_options.degenerate | |
| 32 --max-mismatch $search_options.max_mismatch | |
| 33 #end if | |
| 34 $search_options.only_positive_strand | |
| 35 $search_options.region | |
| 36 '${input_identifier}' | |
| 37 > '$output' | |
| 38 ]]></command> | |
| 39 <inputs> | |
| 40 <param name="input" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input FASTA/FASTQ file"/> | |
| 41 <conditional name="conditional_pattern"> | |
| 42 <param name="mode" type="select" label="Pattern mode" help="Specify pattern directly or upload a file with multiple patterns"> | |
| 43 <option value="expression">Pattern/motif sequence</option> | |
| 44 <option value="file">FASTA file with the pattern/motif of interest</option> | |
| 45 </param> | |
| 46 <when value="expression"> | |
| 47 <param argument="--pattern" type="text" label="Search pattern" help="Pattern to search for. Use quotes for special characters when using regex"> | |
| 48 <sanitizer invalid_char=""> | |
| 49 <valid initial="string.letters,string.digits"> | |
| 50 <add value="^"/> | |
| 51 <add value="$"/> | |
| 52 <add value="("/> | |
| 53 <add value=")"/> | |
| 54 <add value="|"/> | |
| 55 <add value="?"/> | |
| 56 <add value="*"/> | |
| 57 <add value="+"/> | |
| 58 <add value="{"/> | |
| 59 <add value="}"/> | |
| 60 <add value="\"/> | |
| 61 <add value="["/> | |
| 62 <add value="]"/> | |
| 63 <add value="."/> | |
| 64 <add value=","/> | |
| 65 <add value=":"/> | |
| 66 </valid> | |
| 67 </sanitizer> | |
| 68 <validator type="regex" message="Pattern must not end with backslash.">.*[^\\]$</validator> | |
| 69 </param> | |
| 70 <param argument="--use-regexp" type="boolean" truevalue="--use-regexp" falsevalue="" checked="false" label="Interpret pattern as regular expression" help="Enable regular expression matching"/> | |
| 71 </when> | |
| 72 <when value="file"> | |
| 73 <param argument="--pattern-file" type="data" format="fasta" label="Pattern/motif file"/> | |
| 74 </when> | |
| 75 </conditional> | |
| 76 <section name="search_options" title="Search options"> | |
| 77 <param argument="--by-name" type="boolean" truevalue="--by-name" falsevalue="" checked="false" label="Search by sequence name" help="match by full name instead of just ID"/> | |
| 78 <param argument="--by-seq" type="boolean" truevalue="--by-seq" falsevalue="" checked="false" label="Search by sequence content" help="search subseq on seq. Both positive and negative strand are searched by default, you might use only-positive-strand. Mismatch allowed using max-mismatch"/> | |
| 79 <param argument="--ignore-case" type="boolean" truevalue="--ignore-case" falsevalue="" checked="false" label="Ignore case" help="ignore case"/> | |
| 80 <param argument="--only-positive-strand" type="boolean" truevalue="--only-positive-strand" falsevalue="" checked="false" label="Only search positive strand" help="Only search on positive strand (only applies when searching by sequence)"/> | |
| 81 <param argument="--max-mismatch" type="integer" min="0" value="0" label="Maximum mismatches" help="Maximum number of mismatches allowed (only for sequence search, 0 = exact match)"/> | |
| 82 <param argument="--invert-match" type="boolean" truevalue="--invert-match" falsevalue="" checked="false" label="Invert match" help="invert the sense of matching, to select non-matching records"/> | |
| 83 <param argument="--degenerate" type="boolean" truevalue="--degenerate" falsevalue="" checked="false" label="Pattern contains degenerate bases" help="Pattern contains degenerate basee"/> | |
| 84 <param argument="--circular" type="boolean" truevalue="--circular" falsevalue="" checked="false" label="Circular genome" help="Treat sequences as circular for matching"/> | |
| 85 <param argument="--count" type="boolean" truevalue="--count" falsevalue="" checked="false" label="Count" help="just print a count of matching records. with the -v/--invert-match flag, count non-matching records"/> | |
| 86 <param argument="--delete-matched" type="boolean" truevalue="--delete-matched" falsevalue="" checked="false" label="Delete matched patterns" help="delete a pattern right after being matched, this keeps the firstly matched data and speedups when using regular expressions"/> | |
| 87 <param argument="--allow-duplicated-patterns" type="boolean" truevalue="--allow-duplicated-patterns" falsevalue="" checked="false" label="Allow duplicated patterns" help="output records multiple times when duplicated patterns are given"/> | |
| 88 <param argument="--region" type="text" value="" label="Sequence region" help="Specify region for searching (e.g., 1:30 for first 30 bases, -12:-1 for last 12)."> | |
| 89 <validator type="regex" message="Region must be in format 'start:end' or 'start:' or ':end'">^$|^-?[0-9]*:-?[0-9]*$</validator> | |
| 90 </param> | |
| 91 </section> | |
| 92 </inputs> | |
| 93 <outputs> | |
| 94 <data name="output" format_source="input" label="${tool.name} on ${on_string}"/> | |
| 95 </outputs> | |
| 96 <tests> | |
| 97 <test expect_num_outputs="1"> | |
| 98 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
| 99 <conditional name="conditional_pattern"> | |
| 100 <param name="mode" value="expression"/> | |
| 101 <param name="pattern" value="ATGC"/> | |
| 102 </conditional> | |
| 103 <section name="search_options"> | |
| 104 <param name="by_seq" value="true"/> | |
| 105 <param name="max_mismatch" value="0"/> | |
| 106 </section> | |
| 107 <output decompress="true" name="output" file="grep_output1.fasta.gz" ftype="fasta.gz"/> | |
| 108 </test> | |
| 109 <test expect_num_outputs="1"> | |
| 110 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
| 111 <conditional name="conditional_pattern"> | |
| 112 <param name="mode" value="file"/> | |
| 113 <param name="pattern_file" value="grep_pattern.fasta"/> | |
| 114 </conditional> | |
| 115 <section name="search_options"> | |
| 116 <param name="invert_match" value="true"/> | |
| 117 </section> | |
| 118 <output decompress="true" name="output" file="grep_output2.fasta.gz" ftype="fasta.gz"/> | |
| 119 </test> | |
| 120 <test expect_num_outputs="1"> | |
| 121 <param name="input" value="input1.fastq.gz" ftype="fastq.gz"/> | |
| 122 <conditional name="conditional_pattern"> | |
| 123 <param name="mode" value="expression"/> | |
| 124 <param name="pattern" value="^5"/> | |
| 125 <param name="use_regexp" value="true"/> | |
| 126 </conditional> | |
| 127 <section name="search_options"> | |
| 128 <param name="by_name" value="true"/> | |
| 129 </section> | |
| 130 <output decompress="true" name="output" file="grep_output3.fastq.gz" ftype="fastq.gz"/> | |
| 131 </test> | |
| 132 <test expect_num_outputs="1"> | |
| 133 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
| 134 <conditional name="conditional_pattern"> | |
| 135 <param name="mode" value="expression"/> | |
| 136 <param name="pattern" value="NNNNATGC"/> | |
| 137 </conditional> | |
| 138 <section name="search_options"> | |
| 139 <param name="by_seq" value="true"/> | |
| 140 <param name="degenerate" value="true"/> | |
| 141 </section> | |
| 142 <output decompress="true" name="output" file="grep_output4.fasta.gz" ftype="fasta.gz"/> | |
| 143 </test> | |
| 144 </tests> | |
| 145 <help> | |
| 146 .. class:: infomark | |
| 147 | |
| 148 **What it does** | |
| 149 | |
| 150 search sequences by ID/name/sequence/sequence motifs, mismatch allowed | |
| 151 | |
| 152 ------ | |
| 153 | |
| 154 .. class:: infomark | |
| 155 | |
| 156 **Attention** | |
| 157 | |
| 158 0. By default, we match sequence ID with patterns, use "-n/--by-name" | |
| 159 for matching full name instead of just ID. | |
| 160 1. Unlike POSIX/GNU grep, we compare the pattern to the whole target | |
| 161 (ID/full header) by default. Please switch "-r/--use-regexp" on | |
| 162 for partly matching. | |
| 163 2. When searching by sequences, it's partly matching, and both positive | |
| 164 and negative strands are searched. | |
| 165 Please switch on "-P/--only-positive-strand" if you would like to | |
| 166 search only on the positive strand. | |
| 167 Mismatch is allowed using flag "-m/--max-mismatch", you can increase | |
| 168 the value of "-j/--threads" to accelerate processing. | |
| 169 3. Degenerate bases/residues like "RYMM.." are also supported by flag -d. | |
| 170 But do not use degenerate bases/residues in regular expression, you need | |
| 171 convert them to regular expression, e.g., change "N" or "X" to ".". | |
| 172 4. When providing search patterns (motifs) via flag '-p', | |
| 173 please use double quotation marks for patterns containing comma, | |
| 174 e.g., -p '"A{2,}"' or -p "\"A{2,}\"". Because the command line argument | |
| 175 parser accepts comma-separated-values (CSV) for multiple values (motifs). | |
| 176 Patterns in file do not follow this rule. | |
| 177 5. The order of sequences in result is consistent with that in original | |
| 178 file, not the order of the query patterns. | |
| 179 But for FASTA file, you can use: | |
| 180 seqkit faidx seqs.fasta --infile-list IDs.txt | |
| 181 6. For multiple patterns, you can either set "-p" multiple times, i.e., | |
| 182 -p pattern1 -p pattern2, or give a file of patterns via "-f/--pattern-file". | |
| 183 </help> | |
| 184 <expand macro="citations"/> | |
| 185 </tool> |
