Mercurial > repos > iuc > scoary
comparison scoary.xml @ 0:6e2c24c3490a draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scoary commit ce823d6021a7afbc2c49ba60e32faababaffd870"
| author | iuc |
|---|---|
| date | Sun, 21 Mar 2021 12:21:14 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6e2c24c3490a |
|---|---|
| 1 <tool id="scoary" name="Scoary" version="@TOOL_VERSION@+galaxy0" profile="20.01"> | |
| 2 <description>calculates the assocations between all genes in the accessory genome and the traits</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">1.6.16</token> | |
| 5 </macros> | |
| 6 <requirements> | |
| 7 <requirement type="package" version="@TOOL_VERSION@">scoary</requirement> | |
| 8 </requirements> | |
| 9 <version_command>scoary --version</version_command> | |
| 10 <command detect_errors="exit_code"><![CDATA[ | |
| 11 scoary | |
| 12 | |
| 13 ########### | |
| 14 ## Input ## | |
| 15 ########### | |
| 16 | |
| 17 -t '$input_traits' | |
| 18 -g '$input_genes' | |
| 19 | |
| 20 #if $input_restricts: | |
| 21 -r '$input_restricts' | |
| 22 #end if | |
| 23 | |
| 24 ######################## | |
| 25 ## Additional Options ## | |
| 26 ######################## | |
| 27 | |
| 28 #if len($additional_options.series_pc) != 0 | |
| 29 -p #echo " ".join([ "'%s'" % $s.pvalue for $i, $s in enumerate($additional_options.series_pc) ]) | |
| 30 -c #echo " ".join([ "'%s'" % $s.correction for $i, $s in enumerate($additional_options.series_pc) ]) | |
| 31 #end if | |
| 32 | |
| 33 #if $additional_options.permute != 0: | |
| 34 -e str($additional_options.permute) | |
| 35 #end if | |
| 36 | |
| 37 #if $additional_options.maxhits != 0: | |
| 38 -m str($additional_options.maxhits) | |
| 39 #end if | |
| 40 | |
| 41 $additional_options.collapse | |
| 42 $output_options.upgma | |
| 43 | |
| 44 #if $input_newicktree: | |
| 45 -n '$input_newicktree' | |
| 46 #end if | |
| 47 | |
| 48 ######### | |
| 49 ## END ## | |
| 50 ######### | |
| 51 | |
| 52 --no-time | |
| 53 && | |
| 54 tail -n +1 *.csv | sed "s/\,/\\t/g" > scoary_output.tsv && | |
| 55 mv *.nwk scoary_output.nwk | |
| 56 | |
| 57 ]]></command> | |
| 58 <inputs> | |
| 59 <param name="input_traits" argument="-t" type="data" format="csv" label="Trait table"/> | |
| 60 <param name="input_genes" argument="-g" type="data" format="csv" label="Gene Presence/Absence table from ROARY (default output)"/> | |
| 61 <param name="input_restricts" optional="true" argument="-r" type="data" format="tabular" label="Table to analyze a subset of strains" /> | |
| 62 | |
| 63 <!-- Additional Options --> | |
| 64 <section name="additional_options" title="Additional Options"> | |
| 65 <repeat name="series_pc" title="P-value cutoff(s) and Correction(s)"> | |
| 66 <param name="pvalue" argument="-p" type="float" min="0" max="1.0" value="0.05" label="P-value cutoff for one Trait" help="SCOARY will not report genes with higher p-values than this (Default=1.0=All). Provide a single value (applied to all) or exactly as many values as correction criteria and in corresponding order (e.g., 0.05 0.05 for two traits)."/> | |
| 67 <param name="correction" argument="-c" type="select" label="P-value correction" help="Apply the p-value corrections to the p-value cutoffs you have entered (Default = Individual p-value)." > | |
| 68 <option value="I" selected="true">Individual (naive) p-value</option> | |
| 69 <option value="B">Bonferroni adjusted p-value</option> | |
| 70 <option value="BH">Benjamini-Hochberg adjusted p</option> | |
| 71 <option value="PW">Best (lowest) pairwise comparison</option> | |
| 72 <option value="EPW">Entire range of pairwise comparison p-values</option> | |
| 73 <option value="P">Empirical p-value from permutations</option> | |
| 74 </param> | |
| 75 </repeat> | |
| 76 <param name="permute" argument="-e" type="integer" min="0" value="0" label="Permutations" help="Perform N number of permutations of the significant results post-analysis. (Default = 0 = None)" /> | |
| 77 <param name="maxhits" argument="-m" type="integer" min="0" value="0" label="Maximal number of hits to report" help="SCOARY will only report the top max hits results per trait. (Default = 0 = All)" /> | |
| 78 <param name="collapse" argument="--collapse" type="boolean" checked="false" truevalue="--collapse" falsevalue="" label="Collapse correlated genes" help="Collapse correlated genes (genes that have identical distribution patterns in the sample) into merged units. (Default=false)"/> | |
| 79 <param name="input_newicktree" optional="true" argument="-n" type="data" format="newick" label="Supply a custom tree (Newick format) for phylogenetic analyses instead instead of calculating it internally." /> | |
| 80 </section> | |
| 81 | |
| 82 <!-- Output Options --> | |
| 83 <section name="output_options" title="Output Options" expanded="true"> | |
| 84 <param name="upgma" argument="-u" type="boolean" checked="false" truevalue="-u" falsevalue="" label="UPGMA tree" help="Calculate UPGMA tree to a newick file."/> | |
| 85 </section> | |
| 86 </inputs> | |
| 87 <outputs> | |
| 88 <data name="out_tabular" format="tabular" from_work_dir="scoary_output.tsv" label="${tool.name} on ${on_string}: Table" /> | |
| 89 <data name="out_newick" format="newick" from_work_dir="scoary_output.nwk" label="${tool.name} on ${on_string}: Tree"> | |
| 90 <filter>(output_options['upgma'] is True)</filter> | |
| 91 </data> | |
| 92 </outputs> | |
| 93 <tests> | |
| 94 <test expect_num_outputs="2"> | |
| 95 <param name="input_traits" ftype="csv" value="Tetracycline_resistance.csv" /> | |
| 96 <param name="input_genes" ftype="csv" value="Gene_presence_absence.csv" /> | |
| 97 <param name="upgma" value="Yes" /> | |
| 98 <repeat name="series_pc"> | |
| 99 <param name="pvalue" value="0.05"/> | |
| 100 <param name="correction" value="I"/> | |
| 101 </repeat> | |
| 102 <output name="out_tabular" file="scoary_output.tsv" ftype="tabular" sort="true"> | |
| 103 <assert_contents> | |
| 104 <has_n_lines n="573" /> | |
| 105 <has_line line="==> Bogus_trait.results.csv <==" /> | |
| 106 <has_line line="==> Tetracycline_resistance.results.csv <==" /> | |
| 107 </assert_contents> | |
| 108 </output> | |
| 109 <output name="out_newick" file="scoary_output.nwk" ftype="newick" /> | |
| 110 </test> | |
| 111 <test expect_num_outputs="2"> | |
| 112 <param name="input_traits" ftype="csv" value="Tetracycline_resistance.csv" /> | |
| 113 <param name="input_genes" ftype="csv" value="Gene_presence_absence.csv" /> | |
| 114 <param name="upgma" value="Yes" /> | |
| 115 <repeat name="series_pc"> | |
| 116 <param name="pvalue" value="0.05"/> | |
| 117 <param name="correction" value="I"/> | |
| 118 </repeat> | |
| 119 <repeat name="series_pc"> | |
| 120 <param name="pvalue" value="0.05"/> | |
| 121 <param name="correction" value="EPW"/> | |
| 122 </repeat> | |
| 123 <output name="out_tabular" file="scoary_output_2.tsv" ftype="tabular" sort="true"> | |
| 124 <assert_contents> | |
| 125 <has_n_lines n="27" /> | |
| 126 <has_line line="==> Bogus_trait.results.csv <==" /> | |
| 127 <has_line line="==> Tetracycline_resistance.results.csv <==" /> | |
| 128 </assert_contents> | |
| 129 </output> | |
| 130 <output name="out_newick" file="scoary_output_2.nwk" ftype="newick" /> | |
| 131 </test> | |
| 132 </tests> | |
| 133 | |
| 134 <help><![CDATA[ | |
| 135 | |
| 136 .. class:: infomark | |
| 137 | |
| 138 **What it does** | |
| 139 | |
| 140 ------------------- | |
| 141 | |
| 142 **Scoary** | |
| 143 | |
| 144 Scoary is designed to take the csv file from Roary as well as a traits file created by the user and calculate the assocations between all genes in the accessory genome and the traits. It reports a list of genes sorted by strength of association per trait. | |
| 145 | |
| 146 ------------------- | |
| 147 | |
| 148 **Inputs** | |
| 149 | |
| 150 ------------------- | |
| 151 | |
| 152 Scoary requires two input files: csv file from Roary and a list of traits to test associations to. | |
| 153 Traits can be anything as long as you can classify it into binary categories (e.g. antibiotic resistance, group membership (yes/no), MIC value higher/lower than 16). | |
| 154 Make sure you your entires are separated by ','. | |
| 155 The traits file needs to be formatted in a specific way (please take a look into the (documentation)[https://github.com/AdmiralenOla/Scoary]). | |
| 156 | |
| 157 You can also use as input the pan-genome as called from Jason Sahl's program LS-BSR (Large-Scale Blast Score Ratio). | |
| 158 The program includes a python script for converting LS-BSR output to the Roary/Scoary format. | |
| 159 | |
| 160 Trait presence is indicated by 1, trait absence by 0. | |
| 161 Assumes strain names in the first column and trait names in the first row. | |
| 162 | |
| 163 Input gene presence/absence table (comma-separated-values) from ROARY. | |
| 164 Strain names must be equal to those in the trait table. | |
| 165 | |
| 166 ----------- | |
| 167 | |
| 168 **Outputs** | |
| 169 | |
| 170 ----------- | |
| 171 | |
| 172 Scory outputs a single csv traits file. It uses comma "," as a delimiter. | |
| 173 The results consists of genes that were found to be associated with the trait, sorted according to significance. | |
| 174 By default, Scoary reports all genes with a naive p-value < 0.05. | |
| 175 | |
| 176 You can find the description of the columns in the (documentation)[https://github.com/AdmiralenOla/Scoary]. | |
| 177 | |
| 178 -------------------- | |
| 179 | |
| 180 **More Information** | |
| 181 | |
| 182 -------------------- | |
| 183 | |
| 184 See the excellent `Scoary documentation`_ | |
| 185 | |
| 186 .. _`Scoary documentation`: https://github.com/AdmiralenOla/Scoary | |
| 187 | |
| 188 | |
| 189 **P-value cutoff (-p)**: For Fishers, Bonferronis, and Benjamini-Hochbergs tests, SCOARY will not report genes with higher p-values than this. | |
| 190 For empirical p-values, this is treated as an alpha level instead. | |
| 191 I.e. 0.02 will filter all genes except the lower and upper percentile from this test. | |
| 192 Run with "-p 1.0" to report all genes. Accepts standard form (e.g. 1E-8). | |
| 193 Provide a single value (applied to all) or exactly as many values as correction criteria and in corresponding order (e.g., 0.05 0.1 0.05 0.02). | |
| 194 | |
| 195 **Correction (-c)**: Apply the indicated filtration measure: I=Individual (naive) p-value, B=Bonferroni adjusted p-value, BH=Benjamini-Hochberg adjusted p, PW=Best (lowest) pairwise comparison, EPW=Entire range of pairwise comparison p-values, P=Empirical p-value from permutations. | |
| 196 You can enter as many correction criteria as you would like. | |
| 197 These will be associated with the p-value cutoffs you enter. | |
| 198 For example "-c I EPW -p 0.1 0.05" will apply the following cutoffs: Naive p-value must be lower than 0.1 AND the entire range of pairwise comparison values are below 0.05 for this gene. | |
| 199 Note that the empirical p-values should be interpreted at both tails. | |
| 200 Therefore, running "-c P -p 0.05" will apply an alpha of 0.05 to the empirical (permuted) p-values, i.e. it will filter everything except the upper and lower 2.5 percent of the distribution. | |
| 201 | |
| 202 **Permute (-e)**: Perform N number of permutations of the significant results post-analysis. | |
| 203 Each permutation will do a label switching of the phenotype and a new p-value is calculated according to this new dataset. | |
| 204 After all N permutations are completed, the results are ordered in ascending order, and the percentile of the original result in the permuted p-value distribution is reported. | |
| 205 | |
| 206 -------------------- | |
| 207 | |
| 208 **Galaxy Wrapper Development** | |
| 209 | |
| 210 -------------------- | |
| 211 | |
| 212 Author: Florian Heyl | |
| 213 | |
| 214 ]]></help> | |
| 215 <citations> | |
| 216 <citation type="doi">10.1038/s41467-020-15171-6</citation> | |
| 217 </citations> | |
| 218 </tool> |
