Mercurial > repos > iuc > nanopolishcomp_eventaligncollapse
comparison eventaligncollapse.xml @ 0:2f4e5f252d6a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ commit e96954b0926211a1da3292c00aada6d366a2b25d"
| author | iuc |
|---|---|
| date | Thu, 30 Apr 2020 09:55:36 +0000 |
| parents | |
| children | f6413cd3287d |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2f4e5f252d6a |
|---|---|
| 1 <?xml version="1.0"?> | |
| 2 <tool id="nanopolishcomp_eventaligncollapse" name="Eventalign Collapse" version="@TOOL_VERSION@+@WRAPPER_VERSION@"> | |
| 3 <description>by kmers rather than by event</description> | |
| 4 <macros> | |
| 5 <import>macros.xml</import> | |
| 6 </macros> | |
| 7 <expand macro="requirements"/> | |
| 8 <stdio></stdio> | |
| 9 <version_command>NanopolishComp --version</version_command> | |
| 10 <command detect_errors="exit_code"><![CDATA[ | |
| 11 ## initialize | |
| 12 ## requires a minimum of 3 threads | |
| 13 threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) && | |
| 14 | |
| 15 ## run | |
| 16 NanopolishComp | |
| 17 Eventalign_collapse | |
| 18 -i '$i' | |
| 19 -s | |
| 20 -r $r | |
| 21 -f | |
| 22 #for $current in $f | |
| 23 $current | |
| 24 #end for | |
| 25 -o 'results' | |
| 26 -p 'out' | |
| 27 -t \$threads | |
| 28 -v | |
| 29 ]]></command> | |
| 30 <inputs> | |
| 31 <param argument="-i" type="data" format="tabular" label="Select nanopolish eventalign file"/> | |
| 32 <param argument="-s" type="boolean" truevalue="-s" falsevalue="" label="Should raw samples be written?" help="You need to run nanopolish eventalign with --sample option to make use of this feature."/> | |
| 33 <param argument="-r" type="integer" value="0" min="0" label="Set maximum number of reads to parse" help="Use 0 to deactivate this option."/> | |
| 34 <param argument="-f" type="select" multiple="true" label="Select statistical fields to compute" help="You need to run nanopolish eventalign with --sample option to make use of this feature."> | |
| 35 <option value="mean" selected="true">Mean</option> | |
| 36 <option value="std">Std</option> | |
| 37 <option value="median" selected="true">Median</option> | |
| 38 <option value="mad">Mad</option> | |
| 39 <option value="num_signals" selected="true">Number of signals</option> | |
| 40 </param> | |
| 41 <param name="out" type="select" multiple="true" label="Select output file(s)" help=""> | |
| 42 <option value="eventalign_collapse" selected="true">Eventalign Collapse</option> | |
| 43 <option value="index" selected="true">Index</option> | |
| 44 <option value="log">Log</option> | |
| 45 </param> | |
| 46 </inputs> | |
| 47 <outputs> | |
| 48 <data name="out_eventalign_collapse" format="tabular" from_work_dir="results/out_eventalign_collapse.tsv" label="${tool.name} on ${on_string}: Eventalign Collapse"> | |
| 49 <filter>'eventalign_collapse' in out</filter> | |
| 50 </data> | |
| 51 <data name="out_index" format="tabular" from_work_dir="results/out_eventalign_collapse.tsv.idx" label="${tool.name} on ${on_string}: Index"> | |
| 52 <filter>'index' in out</filter> | |
| 53 </data> | |
| 54 <data name="out_log" format="txt" from_work_dir="results/out_eventalign_collapse.log" label="${tool.name} on ${on_string}: log"> | |
| 55 <filter>'log' in out</filter> | |
| 56 </data> | |
| 57 </outputs> | |
| 58 <tests> | |
| 59 <!-- #1 default --> | |
| 60 <test expect_num_outputs="3"> | |
| 61 <param name="i" value="sample.tsv"/> | |
| 62 <param name="out" value="eventalign_collapse,index,log"/> | |
| 63 <output name="out_eventalign_collapse"> | |
| 64 <assert_contents> | |
| 65 <has_n_lines n="236"/> | |
| 66 <has_text_matching expression="ref_pos	ref_kmer"/> | |
| 67 <has_text_matching expression="22102	GGAAA"/> | |
| 68 </assert_contents> | |
| 69 </output> | |
| 70 <output name="out_index"> | |
| 71 <assert_contents> | |
| 72 <has_n_lines n="60"/> | |
| 73 <has_text_matching expression="ref_id	ref_start"/> | |
| 74 <has_text_matching expression="chr	22102"/> | |
| 75 </assert_contents> | |
| 76 </output> | |
| 77 <output name="out_log"> | |
| 78 <assert_contents> | |
| 79 <has_n_lines n="13"/> | |
| 80 <has_line line="General options:"/> | |
| 81 </assert_contents> | |
| 82 </output> | |
| 83 </test> | |
| 84 <!-- #2 --> | |
| 85 <test expect_num_outputs="2"> | |
| 86 <param name="i" value="sample.tsv"/> | |
| 87 <param name="s" value="true"/> | |
| 88 <param name="r" value="10"/> | |
| 89 <param name="f" value="mean,std,median,mad,num_signals"/> | |
| 90 <param name="out" value="eventalign_collapse,index"/> | |
| 91 <output name="out_eventalign_collapse"> | |
| 92 <assert_contents> | |
| 93 <has_n_lines n="236"/> | |
| 94 <has_text_matching expression="ref_pos	ref_kmer"/> | |
| 95 <has_text_matching expression="22102	GGAAA"/> | |
| 96 </assert_contents> | |
| 97 </output> | |
| 98 <output name="out_index"> | |
| 99 <assert_contents> | |
| 100 <has_n_lines n="60"/> | |
| 101 <has_text_matching expression="ref_id	ref_start"/> | |
| 102 <has_text_matching expression="chr	22102"/> | |
| 103 </assert_contents> | |
| 104 </output> | |
| 105 </test> | |
| 106 </tests> | |
| 107 <help><![CDATA[ | |
| 108 .. class:: infomark | |
| 109 | |
| 110 **What it does** | |
| 111 | |
| 112 @WID@ | |
| 113 | |
| 114 Eventalign_collapse collapses the raw file generated by nanopolish eventalign by kmers rather than by event. | |
| 115 | |
| 116 **Input** | |
| 117 | |
| 118 A nanopolish eventalign tabular output file. | |
| 119 | |
| 120 **Output** | |
| 121 | |
| 122 Contrary to nanopolish eventalign output text file, in Eventalign_collapse the reads are separated by a hashtag headers containing the read_id and ref_id. This reduces the redundancy and makes it easier to find the start and end of a read. | |
| 123 | |
| 124 :: | |
| 125 | |
| 126 Example : #7ef1d7b9-5824-4382-b23b-78d82c07ebbd YHR055C. | |
| 127 | |
| 128 The main data file contains the following fields: | |
| 129 | |
| 130 - ref_pos: Reference sequence ID (contig). | |
| 131 - ref_kmer: Sequence of the reference kmers. | |
| 132 - -num_events: Number of events for this kmer before collapsing. | |
| 133 - dwell_time: dwell time for this kmer in seconds | |
| 134 - NNNNN_dwell_time: dwell time of events for this kmers with a model sequence "NNNNN" (events ignored by nanopolish HMM). | |
| 135 - mismatch_dwell_time: dwell time of events for this kmers with a model sequence different from the reference kmer | |
| 136 - start_idx: Only if nanopolish eventalign called with --signal_idx. Start coordinate on original raw signal in fast5 file | |
| 137 - end_idx: Only if nanopolish eventalign called with --signal_idx. End coordinate on original raw signal in fast5 file | |
| 138 - mean: Only if nanopolish eventalign called with --samples. Mean of the normalised signal values provided by Nanopolish eventalign | |
| 139 - median: Only if nanopolish eventalign called with --samples. Median of the normalised signal values provided by Nanopolish eventalign | |
| 140 - std: Only if nanopolish eventalign called with --samples. Standard deviation of the normalised signal values provided by Nanopolish eventalign | |
| 141 - mad: Only if nanopolish eventalign called with --samples. Median absolute deviation of the normalised signal values provided by Nanopolish eventalign | |
| 142 - num_signals: Only if nanopolish eventalign called with --samples. Number of raw signal points. | |
| 143 - samples: Only if nanopolish eventalign called with --samples and Eventalign_collapse called with --write_samples. List of normalised signal intensity values for this kmer | |
| 144 | |
| 145 In addition Eventalign_collapse also generates an useful index file containing reads level information. It contains the following fields: | |
| 146 | |
| 147 - read_id: Name or index of the read | |
| 148 - ref_id: Name of the reference sequence the read was aligned on (contig) | |
| 149 - ref_start: Start coordinate of the alignment on the reference sequence | |
| 150 - ref_end: End coordinate of the alignment on the reference sequence | |
| 151 - dwell_time: Cumulative dwell time in seconds for the entire resquiggled sequence | |
| 152 - kmers: Overall number of resquiggled kmers | |
| 153 - NNNNN_kmers: Number of resquiggled kmers containing at least 1 event for which the model sequence was "NNNNN" | |
| 154 - mismatching_kmers: Number of resquiggled kmers containing at least 1 event for which the model sequence diverged from the reference sequence | |
| 155 - missing_kmers: Number of skipped/missing reference positions in nanopolish output | |
| 156 - byte_offset: Number of characters before the start of the sequence in the main output file. This can be used in conjunction with file.seek() to directly access the start of a read. An example is provided in the Usage notebook. | |
| 157 - byte_len: Length of characters after byte_offset to the end of the read, excluding the last newline. This can be used in conjunction with read() to read all the text chunk corresponding to the read. | |
| 158 | |
| 159 .. class:: infomark | |
| 160 | |
| 161 **References** | |
| 162 | |
| 163 @REFERENCES@ | |
| 164 ]]></help> | |
| 165 <expand macro="citations"/> | |
| 166 </tool> |
