Mercurial > repos > iuc > nanocompore_sampcomp
comparison sampcomp.xml @ 0:952c4ac5ef22 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nanocompore commit 8fa5ff35b45c2b046c7f4800410cf39cb89a299a"
| author | iuc |
|---|---|
| date | Tue, 05 May 2020 11:01:24 +0000 |
| parents | |
| children | e5b88f0e7960 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:952c4ac5ef22 |
|---|---|
| 1 <?xml version="1.0"?> | |
| 2 <tool id="nanocompore_sampcomp" name="SampComp" version="@TOOL_VERSION@+@WRAPPER_VERSION@"> | |
| 3 <description>to compare Nanopolished datasets</description> | |
| 4 <macros> | |
| 5 <import>macros.xml</import> | |
| 6 </macros> | |
| 7 <expand macro="requirements"/> | |
| 8 <version_command><![CDATA[nanocompore --version]]></version_command> | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 ## initialize | |
| 11 ## requires a minimum of 3 threads | |
| 12 threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) && | |
| 13 ## same name pattern required | |
| 14 #for $i, $current in enumerate($file1_rep) | |
| 15 ln -s '$current.file' 'sample_1_${i}.tsv' && | |
| 16 ln -s '$current.index' 'sample_1_${i}.tsv.idx' && | |
| 17 #end for | |
| 18 #for $i, $current in enumerate($file2_rep) | |
| 19 ln -s '$current.file' 'sample_2_${i}.tsv' && | |
| 20 ln -s '$current.index' 'sample_2_${i}.tsv.idx' && | |
| 21 #end for | |
| 22 | |
| 23 ## run | |
| 24 nanocompore sampcomp | |
| 25 ## required | |
| 26 --label1 '$label1' | |
| 27 #set files1 = ','.join(['sample_1_' + str(item) + '.tsv' for item in range(len($file1_rep))]) | |
| 28 --file_list1 '$files1' | |
| 29 --label2 '$label2' | |
| 30 #set files2 = ','.join(['sample_2_' + str(item) + '.tsv' for item in range(len($file2_rep))]) | |
| 31 --file_list2 '$files2' | |
| 32 --fasta '$fasta' | |
| 33 ## optional | |
| 34 #if $ap.bed | |
| 35 --bed '$ap.bed' | |
| 36 #end if | |
| 37 --max_invalid_kmers_freq $ap.max_invalid_kmers_freq | |
| 38 --min_coverage $ap.min_coverage | |
| 39 --min_ref_length $ap.min_ref_length | |
| 40 --comparison_methods '$ap.comparison_methods' | |
| 41 --sequence_context $ap.sequence_context | |
| 42 --sequence_context_weights '$ap.sequence_context_weights' | |
| 43 --pvalue_thr $ap.pvalue_thr | |
| 44 $ap.logit | |
| 45 $ap.allow_warnings | |
| 46 --outpath 'results' | |
| 47 --nthreads \$threads | |
| 48 --log_level debug | |
| 49 | |
| 50 && tar -cf 'results/db.tar' 'results/out_SampComp.db.bak' 'results/out_SampComp.db.dir' 'results/out_SampComp.db.dat' | |
| 51 ]]></command> | |
| 52 <inputs> | |
| 53 <param argument="--label1" type="text" value="Condition 1" label="Set label of first condition"/> | |
| 54 <repeat name="file1_rep" min="1" title="First condition files"> | |
| 55 <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list1)"/> | |
| 56 <param name="index" type="data" format="tabular" label="Select index file"/> | |
| 57 </repeat> | |
| 58 <param argument="--label2" type="text" value="Condition 2" label="Set label of second condition"/> | |
| 59 <repeat name="file2_rep" min="1" title="Second condition files"> | |
| 60 <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list2)"/> | |
| 61 <param name="index" type="data" format="tabular" label="Select index file"/> | |
| 62 </repeat> | |
| 63 <param argument="--fasta" type="data" format="fasta" label="Select mapping file"/> | |
| 64 | |
| 65 <section name="ap" title="Advanced parameters"> | |
| 66 <param argument="--bed" type="data" format="bed" optional="true" label="Select mapping file with annotation of transcriptome"/> | |
| 67 <param argument="--max_invalid_kmers_freq" type="float" value="0.1" min="0.0" max="1.0" label="Set max fequency of invalid kmers"/> | |
| 68 <param argument="--min_coverage" type="integer" value="30" min="0" label="Set minimum coverage required in each condition to do the comparison"/> | |
| 69 <param argument="--min_ref_length" type="integer" value="100" min="0" label="Set minimum length of a reference transcript to include it in the analysis"/> | |
| 70 <param argument="--comparison_methods" type="select" multiple="true" label="Select comparison methods"> | |
| 71 <option value="GMM" selected="true">GMM</option> | |
| 72 <option value="KS" selected="true">KS</option> | |
| 73 <option value="TT">TT</option> | |
| 74 <option value="MW">MW</option> | |
| 75 </param> | |
| 76 <param argument="--sequence_context" type="integer" value="0" min="0" max="4" label="Set sequence context for combining p-values"/> | |
| 77 <param argument="--sequence_context_weights" type="select" label="Select type of weights to use for combining p-values"> | |
| 78 <option value="uniform" selected="true">Uniform</option> | |
| 79 <option value="harmonic">Harmonic</option> | |
| 80 </param> | |
| 81 <param argument="--pvalue_thr" type="float" value="0.05" min="0.0" max="1.0" label="Set adjusted p-value threshold for reporting significant sites"/> | |
| 82 <param argument="--logit" type="boolean" truevalue="--logit" falsevalue="" label="Use logistic regression testing also when all conditions have replicates?"/> | |
| 83 <param argument="--allow_warnings" type="boolean" truevalue="--allow_warnings" falsevalue="" label="Should runtime warnings during the ANOVA tests raise an error?"/> | |
| 84 <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)"> | |
| 85 <option value="results" selected="true">Results</option> | |
| 86 <option value="shift" selected="true">Shift stats</option> | |
| 87 <option value="db" selected="true">Database (*.db.dir, *.db.bak, *.db.dat)</option> | |
| 88 <option value="log">Log</option> | |
| 89 </param> | |
| 90 </section> | |
| 91 </inputs> | |
| 92 <outputs> | |
| 93 <data name="out_results" format="tabular" from_work_dir="results/out_nanocompore_results.tsv" label="${tool.name} on ${on_string}: Results"> | |
| 94 <filter>'results' in ap['out']</filter> | |
| 95 </data> | |
| 96 <data name="out_shift" format="tabular" from_work_dir="results/out_nanocompore_shift_stats.tsv" label="${tool.name} on ${on_string}: Shift stats"> | |
| 97 <filter>'shift' in ap['out']</filter> | |
| 98 </data> | |
| 99 <data name="out_db" format="tar" from_work_dir="results/db.tar" label="${tool.name} on ${on_string}: Database"> | |
| 100 <filter>'db' in ap['out']</filter> | |
| 101 </data> | |
| 102 <data name="out_log" format="txt" from_work_dir="results/out_SampComp.log" label="${tool.name} on ${on_string}: log"> | |
| 103 <filter>'log' in ap['out']</filter> | |
| 104 </data> | |
| 105 </outputs> | |
| 106 <tests> | |
| 107 <!-- #1 --> | |
| 108 <test expect_num_outputs="3"> | |
| 109 <repeat name="file1_rep"> | |
| 110 <param name="file" value="sample1.tsv"/> | |
| 111 <param name="index" value="sample1.tsv.idx"/> | |
| 112 </repeat> | |
| 113 <repeat name="file2_rep"> | |
| 114 <param name="file" value="sample2.tsv"/> | |
| 115 <param name="index" value="sample2.tsv.idx"/> | |
| 116 </repeat> | |
| 117 <param name="fasta" value="reference.fa"/> | |
| 118 <output name="out_results"> | |
| 119 <assert_contents> | |
| 120 <has_n_lines n="3"/> | |
| 121 <has_text_matching expression="pos	chr.+"/> | |
| 122 <has_text_matching expression="22102	NA.+"/> | |
| 123 </assert_contents> | |
| 124 </output> | |
| 125 <output name="out_shift"> | |
| 126 <assert_contents> | |
| 127 <has_n_lines n="3"/> | |
| 128 <has_text_matching expression="ref\_id	pos.+"/> | |
| 129 <has_text_matching expression="chr	22102.+"/> | |
| 130 </assert_contents> | |
| 131 </output> | |
| 132 <output name="out_db"> | |
| 133 <assert_contents> | |
| 134 <has_size value="5408256"/> | |
| 135 </assert_contents> | |
| 136 </output> | |
| 137 </test> | |
| 138 <!-- #2 --> | |
| 139 <test expect_num_outputs="4"> | |
| 140 <param name="label1" value="C1"/> | |
| 141 <repeat name="file1_rep"> | |
| 142 <param name="file" value="sample1.tsv"/> | |
| 143 <param name="index" value="sample1.tsv.idx"/> | |
| 144 </repeat> | |
| 145 <param name="label2" value="C2"/> | |
| 146 <repeat name="file2_rep"> | |
| 147 <param name="file" value="sample2.tsv"/> | |
| 148 <param name="index" value="sample2.tsv.idx"/> | |
| 149 </repeat> | |
| 150 <param name="fasta" value="reference.fa"/> | |
| 151 <section name="ap"> | |
| 152 <param name="max_invalid_kmers_freq" value="0.2"/> | |
| 153 <param name="min_coverage" value="31"/> | |
| 154 <param name="min_ref_length" value="101"/> | |
| 155 <param name="comparison_methods" value="GMM,KS,TT,MW"/> | |
| 156 <param name="sequence_context" value="1"/> | |
| 157 <param name="sequence_context_weights" value="harmonic"/> | |
| 158 <param name="pvalue_thr" value="0.06"/> | |
| 159 <param name="logit" value="true"/> | |
| 160 <param name="allow_warnings" value="true"/> | |
| 161 <param name="out" value="results,shift,db,log"/> | |
| 162 </section> | |
| 163 <output name="out_results"> | |
| 164 <assert_contents> | |
| 165 <has_n_lines n="3"/> | |
| 166 <has_text_matching expression="pos	chr.+"/> | |
| 167 <has_text_matching expression="22102	NA.+"/> | |
| 168 </assert_contents> | |
| 169 </output> | |
| 170 <output name="out_shift"> | |
| 171 <assert_contents> | |
| 172 <has_n_lines n="3"/> | |
| 173 <has_text_matching expression="ref\_id	pos.+"/> | |
| 174 <has_text_matching expression="chr	22102.+"/> | |
| 175 </assert_contents> | |
| 176 </output> | |
| 177 <output name="out_db"> | |
| 178 <assert_contents> | |
| 179 <has_size value="5410304"/> | |
| 180 </assert_contents> | |
| 181 </output> | |
| 182 <output name="out_log"> | |
| 183 <assert_contents> | |
| 184 <has_n_lines n="31"/> | |
| 185 <has_text_matching expression=".+package\_name.+"/> | |
| 186 </assert_contents> | |
| 187 </output> | |
| 188 </test> | |
| 189 </tests> | |
| 190 <help><![CDATA[ | |
| 191 .. class:: infomark | |
| 192 | |
| 193 **What it does** | |
| 194 | |
| 195 @WID@ | |
| 196 | |
| 197 SampComp provides a very flexible analysis framework with a few mandatory options and many optional parameters. | |
| 198 | |
| 199 First, SampComp parses the sample eventalign collapse files and then the observed results are piled-up per reference at position level. In a second time, positions are compared using various statistical methods and the statistics are stored in a shelve DBM database containing the results for all positions with sufficient coverage. | |
| 200 | |
| 201 **Input** | |
| 202 | |
| 203 SampComp requires sample files obtained with NanopolishComp EventalignCollapse as explained before (see data preparation) for both the control and the experimental conditions. 2 conditions are expected and at least 2 replicates per conditions are highly recommended. | |
| 204 | |
| 205 A transcriptome FASTA reference file is required to extract kmer sequences during the analyses. The reference has to be the same as the one used at the mapping step. | |
| 206 | |
| 207 Optionally, a BED file containing the genome annotations corresponding to the transcriptome fasta file can be provided. In that case Nanocompore will also convert the transcript coordinates into the genome space. | |
| 208 | |
| 209 **Output** | |
| 210 | |
| 211 The database object returned by Sampcomp is a Python GDBM object database indexed by reference id and can be be used with SampCompDB. | |
| 212 | |
| 213 .. class:: infomark | |
| 214 | |
| 215 **References** | |
| 216 | |
| 217 @REFERENCES@ | |
| 218 ]]></help> | |
| 219 <expand macro="citations"/> | |
| 220 </tool> |
