comparison mzsqlite_psm_align.xml @ 1:4f8cf8fbef57 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit b0c57cac4e558d974a16b14d4498cf8d4ba9e0c7
author galaxyp
date Thu, 19 Apr 2018 14:30:28 -0400
parents f2dc9805107a
children
comparison
equal deleted inserted replaced
0:f2dc9805107a 1:4f8cf8fbef57
1 <tool id="mzsqlite_psm_align" name="MzSQLite ProBED ProBAM" version="0.1.0"> 1 <tool id="mzsqlite_psm_align" name="MzSQLite ProBAM ProBED" version="0.1.0">
2 <description>from mz.sqlite aand genomic mapping</description> 2 <description>from mz.sqlite and genomic mapping</description>
3 <requirements> 3 <requirements>
4 <requirement type="package">biopython</requirement> 4 <requirement type="package">biopython</requirement>
5 <requirement type="package">twobitreader</requirement> 5 <requirement type="package">twobitreader</requirement>
6 <requirement type="package">pysam</requirement> 6 <requirement type="package">pysam</requirement>
7 <requirement type="package">gffutils</requirement> 7 <requirement type="package">gffutils</requirement>
39 #end if 39 #end if
40 -v 40 -v
41 '$mzsqlitedb' '$genomicdb' 41 '$mzsqlitedb' '$genomicdb'
42 ]]></command> 42 ]]></command>
43 <inputs> 43 <inputs>
44 <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite databse"/> 44 <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite database"
45 <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite databse"/> 45 help="generated from mzIndentML by mz_to_sqlite"/>
46 <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite database"
47 help="Genomic mapping for the Search proteins in the mzIdentML"/>
46 <conditional name="ref"> 48 <conditional name="ref">
47 <param name="ref_source" type="select" label="Source for Genomic Sequence Data"> 49 <param name="ref_source" type="select" label="Source for Genomic Sequence Data"
50 help="Used to generate the genomic reference sequence for idenfied peptides">
48 <option value="cached">Locally cached twobit</option> 51 <option value="cached">Locally cached twobit</option>
49 <option value="history">History dataset twobit</option> 52 <option value="history">History dataset twobit</option>
50 </param> 53 </param>
51 <when value="cached"> 54 <when value="cached">
52 <param name="ref_loc" type="select" label="Select reference 2bit file"> 55 <param name="ref_loc" type="select" label="Select reference 2bit file">
55 </when> 58 </when>
56 <when value="history"> 59 <when value="history">
57 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" /> 60 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" />
58 </when> 61 </when>
59 </conditional> 62 </conditional>
60 <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true"/> 63 <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true"
61 <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true"/> 64 help="Categorizes the peptide by GTF feature for the proBAM XG tag" />
62 <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true"/> 65 <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true"
66 help="Allows proBAM SEQ field to be modified with observed variants"/>
67 <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true"
68 help="The genome reference name to use in the proBED genomeReferenceVersion column"/>
63 <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true"> 69 <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true">
64 <option value="probam">pro.bam</option> 70 <option value="probam" selected="true">pro.bam</option>
65 <option value="prosam">pro.sam</option> 71 <option value="prosam">pro.sam</option>
66 <option value="probed">pro.bed</option> 72 <option value="probed">pro.bed</option>
67 </param> 73 </param>
68 </inputs> 74 </inputs>
69 <outputs> 75 <outputs>
77 <filter>'probed' in output_formats</filter> 83 <filter>'probed' in output_formats</filter>
78 </data> 84 </data>
79 </outputs> 85 </outputs>
80 <help><![CDATA[ 86 <help><![CDATA[
81 87
82 Generates proBAM or proBED feature alignment files for peptides identified from a mass spectrometry protein search analysis. 88 Generates proBAM_ or proBED_ feature alignment files for peptides identified from a mass spectrometry protein search analysis.
83 89
84 The tool mz_to_sqlite generates the a SQLite database for a mzIdentML file, 90
91 The tool mz_to_sqlite_ generates the a SQLite database for a mzIdentML file,
85 along with the fasta search database and the spectrum files used in the search. 92 along with the fasta search database and the spectrum files used in the search.
93 This mz.sqlite database is used in conjuction with a genomic mapping sqlite database
94 to generate the proBAM_ or proBED_ feature alignment files.
86 95
87 The genomic mapping sqlite database has this schema: 96 The genomic mapping sqlite database has this schema:
97
98 ::
88 99
89 CREATE TABLE feature_cds_map ( /* One row for each exon in the search protein */ 100 CREATE TABLE feature_cds_map ( /* One row for each exon in the search protein */
90 name TEXT, /* Accession name of search protein in mzIdentML */ 101 name TEXT, /* Accession name of search protein in mzIdentML */
91 chrom TEXT, /* Reference genome chromosome for this exon */ 102 chrom TEXT, /* Reference genome chromosome for this exon */
92 start INTEGER, /* genomic start of the exon (zero-based like BED) */ 103 start INTEGER, /* genomic start of the exon (zero-based like BED) */
94 strand TEXT, /* genomic strand: '+' or '-' */ 105 strand TEXT, /* genomic strand: '+' or '-' */
95 cds_start INTEGER, /* The CDS coding start for this exon (zero-based) */ 106 cds_start INTEGER, /* The CDS coding start for this exon (zero-based) */
96 cds_end INTEGER /* The CDS coding start end this exon (non-inclusive) */ 107 cds_end INTEGER /* The CDS coding start end this exon (non-inclusive) */
97 ); 108 );
98 109
110
99 Example: 111 Example:
112
113 ::
114
100 sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%'; 115 sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%';
116 name chrom start end strand cds_start cds_end
101 ENSMUSP00000000001 chr3 108145887 108146005 - 0 118 117 ENSMUSP00000000001 chr3 108145887 108146005 - 0 118
102 ENSMUSP00000000001 chr3 108123794 108123837 - 118 161 118 ENSMUSP00000000001 chr3 108123794 108123837 - 118 161
103 ENSMUSP00000000001 chr3 108123541 108123683 - 161 303 119 ENSMUSP00000000001 chr3 108123541 108123683 - 161 303
104 ENSMUSP00000000001 chr3 108118300 108118458 - 303 461 120 ENSMUSP00000000001 chr3 108118300 108118458 - 303 461
105 ENSMUSP00000000001 chr3 108115762 108115891 - 461 590 121 ENSMUSP00000000001 chr3 108115762 108115891 - 461 590
106 ENSMUSP00000000001 chr3 108112472 108112602 - 590 720 122 ENSMUSP00000000001 chr3 108112472 108112602 - 590 720
107 ENSMUSP00000000001 chr3 108111934 108112088 - 720 874 123 ENSMUSP00000000001 chr3 108111934 108112088 - 720 874
108 ENSMUSP00000000001 chr3 108109421 108109612 - 874 1065 124 ENSMUSP00000000001 chr3 108109421 108109612 - 874 1065
109 125
126 Each row represents an exon in the search protein.
127 The locations: start,end, cds_start, and cds_end are **zero-based** like BED format.
128
129 The **name** field must match the **accession** name used in the mz.sqlite database
130 and thus the mzIdentML search results file.
131
132 The protein positions are described in CDS base offsets rather than Animo Acids offsets
133 to allow for codons being split across exons.
134
110 This schema can describe structural variants as well as canonical transcripts. 135 This schema can describe structural variants as well as canonical transcripts.
111 136
137 .. _proBAM: http://www.psidev.info/probam
138 .. _proBED: http://www.psidev.info/probed
139 .. _mz_to_sqlite: https://toolshed.g2.bx.psu.edu/view/galaxyp/mz_to_sqlite/e34bdac5b157
140
112 ]]></help> 141 ]]></help>
142 <citations>
143 <citation type="doi">10.1186/s13059-017-1377-x</citation>
144 </citations>
113 </tool> 145 </tool>