Mercurial > repos > galaxyp > mzsqlite_psm_align
comparison mzsqlite_psm_align.xml @ 1:4f8cf8fbef57 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit b0c57cac4e558d974a16b14d4498cf8d4ba9e0c7
author | galaxyp |
---|---|
date | Thu, 19 Apr 2018 14:30:28 -0400 |
parents | f2dc9805107a |
children |
comparison
equal
deleted
inserted
replaced
0:f2dc9805107a | 1:4f8cf8fbef57 |
---|---|
1 <tool id="mzsqlite_psm_align" name="MzSQLite ProBED ProBAM" version="0.1.0"> | 1 <tool id="mzsqlite_psm_align" name="MzSQLite ProBAM ProBED" version="0.1.0"> |
2 <description>from mz.sqlite aand genomic mapping</description> | 2 <description>from mz.sqlite and genomic mapping</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package">biopython</requirement> | 4 <requirement type="package">biopython</requirement> |
5 <requirement type="package">twobitreader</requirement> | 5 <requirement type="package">twobitreader</requirement> |
6 <requirement type="package">pysam</requirement> | 6 <requirement type="package">pysam</requirement> |
7 <requirement type="package">gffutils</requirement> | 7 <requirement type="package">gffutils</requirement> |
39 #end if | 39 #end if |
40 -v | 40 -v |
41 '$mzsqlitedb' '$genomicdb' | 41 '$mzsqlitedb' '$genomicdb' |
42 ]]></command> | 42 ]]></command> |
43 <inputs> | 43 <inputs> |
44 <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite databse"/> | 44 <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite database" |
45 <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite databse"/> | 45 help="generated from mzIndentML by mz_to_sqlite"/> |
46 <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite database" | |
47 help="Genomic mapping for the Search proteins in the mzIdentML"/> | |
46 <conditional name="ref"> | 48 <conditional name="ref"> |
47 <param name="ref_source" type="select" label="Source for Genomic Sequence Data"> | 49 <param name="ref_source" type="select" label="Source for Genomic Sequence Data" |
50 help="Used to generate the genomic reference sequence for idenfied peptides"> | |
48 <option value="cached">Locally cached twobit</option> | 51 <option value="cached">Locally cached twobit</option> |
49 <option value="history">History dataset twobit</option> | 52 <option value="history">History dataset twobit</option> |
50 </param> | 53 </param> |
51 <when value="cached"> | 54 <when value="cached"> |
52 <param name="ref_loc" type="select" label="Select reference 2bit file"> | 55 <param name="ref_loc" type="select" label="Select reference 2bit file"> |
55 </when> | 58 </when> |
56 <when value="history"> | 59 <when value="history"> |
57 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" /> | 60 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" /> |
58 </when> | 61 </when> |
59 </conditional> | 62 </conditional> |
60 <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true"/> | 63 <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true" |
61 <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true"/> | 64 help="Categorizes the peptide by GTF feature for the proBAM XG tag" /> |
62 <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true"/> | 65 <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true" |
66 help="Allows proBAM SEQ field to be modified with observed variants"/> | |
67 <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true" | |
68 help="The genome reference name to use in the proBED genomeReferenceVersion column"/> | |
63 <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true"> | 69 <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true"> |
64 <option value="probam">pro.bam</option> | 70 <option value="probam" selected="true">pro.bam</option> |
65 <option value="prosam">pro.sam</option> | 71 <option value="prosam">pro.sam</option> |
66 <option value="probed">pro.bed</option> | 72 <option value="probed">pro.bed</option> |
67 </param> | 73 </param> |
68 </inputs> | 74 </inputs> |
69 <outputs> | 75 <outputs> |
77 <filter>'probed' in output_formats</filter> | 83 <filter>'probed' in output_formats</filter> |
78 </data> | 84 </data> |
79 </outputs> | 85 </outputs> |
80 <help><![CDATA[ | 86 <help><![CDATA[ |
81 | 87 |
82 Generates proBAM or proBED feature alignment files for peptides identified from a mass spectrometry protein search analysis. | 88 Generates proBAM_ or proBED_ feature alignment files for peptides identified from a mass spectrometry protein search analysis. |
83 | 89 |
84 The tool mz_to_sqlite generates the a SQLite database for a mzIdentML file, | 90 |
91 The tool mz_to_sqlite_ generates the a SQLite database for a mzIdentML file, | |
85 along with the fasta search database and the spectrum files used in the search. | 92 along with the fasta search database and the spectrum files used in the search. |
93 This mz.sqlite database is used in conjuction with a genomic mapping sqlite database | |
94 to generate the proBAM_ or proBED_ feature alignment files. | |
86 | 95 |
87 The genomic mapping sqlite database has this schema: | 96 The genomic mapping sqlite database has this schema: |
97 | |
98 :: | |
88 | 99 |
89 CREATE TABLE feature_cds_map ( /* One row for each exon in the search protein */ | 100 CREATE TABLE feature_cds_map ( /* One row for each exon in the search protein */ |
90 name TEXT, /* Accession name of search protein in mzIdentML */ | 101 name TEXT, /* Accession name of search protein in mzIdentML */ |
91 chrom TEXT, /* Reference genome chromosome for this exon */ | 102 chrom TEXT, /* Reference genome chromosome for this exon */ |
92 start INTEGER, /* genomic start of the exon (zero-based like BED) */ | 103 start INTEGER, /* genomic start of the exon (zero-based like BED) */ |
94 strand TEXT, /* genomic strand: '+' or '-' */ | 105 strand TEXT, /* genomic strand: '+' or '-' */ |
95 cds_start INTEGER, /* The CDS coding start for this exon (zero-based) */ | 106 cds_start INTEGER, /* The CDS coding start for this exon (zero-based) */ |
96 cds_end INTEGER /* The CDS coding start end this exon (non-inclusive) */ | 107 cds_end INTEGER /* The CDS coding start end this exon (non-inclusive) */ |
97 ); | 108 ); |
98 | 109 |
110 | |
99 Example: | 111 Example: |
112 | |
113 :: | |
114 | |
100 sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%'; | 115 sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%'; |
116 name chrom start end strand cds_start cds_end | |
101 ENSMUSP00000000001 chr3 108145887 108146005 - 0 118 | 117 ENSMUSP00000000001 chr3 108145887 108146005 - 0 118 |
102 ENSMUSP00000000001 chr3 108123794 108123837 - 118 161 | 118 ENSMUSP00000000001 chr3 108123794 108123837 - 118 161 |
103 ENSMUSP00000000001 chr3 108123541 108123683 - 161 303 | 119 ENSMUSP00000000001 chr3 108123541 108123683 - 161 303 |
104 ENSMUSP00000000001 chr3 108118300 108118458 - 303 461 | 120 ENSMUSP00000000001 chr3 108118300 108118458 - 303 461 |
105 ENSMUSP00000000001 chr3 108115762 108115891 - 461 590 | 121 ENSMUSP00000000001 chr3 108115762 108115891 - 461 590 |
106 ENSMUSP00000000001 chr3 108112472 108112602 - 590 720 | 122 ENSMUSP00000000001 chr3 108112472 108112602 - 590 720 |
107 ENSMUSP00000000001 chr3 108111934 108112088 - 720 874 | 123 ENSMUSP00000000001 chr3 108111934 108112088 - 720 874 |
108 ENSMUSP00000000001 chr3 108109421 108109612 - 874 1065 | 124 ENSMUSP00000000001 chr3 108109421 108109612 - 874 1065 |
109 | 125 |
126 Each row represents an exon in the search protein. | |
127 The locations: start,end, cds_start, and cds_end are **zero-based** like BED format. | |
128 | |
129 The **name** field must match the **accession** name used in the mz.sqlite database | |
130 and thus the mzIdentML search results file. | |
131 | |
132 The protein positions are described in CDS base offsets rather than Animo Acids offsets | |
133 to allow for codons being split across exons. | |
134 | |
110 This schema can describe structural variants as well as canonical transcripts. | 135 This schema can describe structural variants as well as canonical transcripts. |
111 | 136 |
137 .. _proBAM: http://www.psidev.info/probam | |
138 .. _proBED: http://www.psidev.info/probed | |
139 .. _mz_to_sqlite: https://toolshed.g2.bx.psu.edu/view/galaxyp/mz_to_sqlite/e34bdac5b157 | |
140 | |
112 ]]></help> | 141 ]]></help> |
142 <citations> | |
143 <citation type="doi">10.1186/s13059-017-1377-x</citation> | |
144 </citations> | |
113 </tool> | 145 </tool> |