Mercurial > repos > galaxyp > mzsqlite_psm_align
comparison mzsqlite_psm_align.xml @ 1:4f8cf8fbef57 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mzsqlite_psm_align commit b0c57cac4e558d974a16b14d4498cf8d4ba9e0c7
| author | galaxyp |
|---|---|
| date | Thu, 19 Apr 2018 14:30:28 -0400 |
| parents | f2dc9805107a |
| children |
comparison
equal
deleted
inserted
replaced
| 0:f2dc9805107a | 1:4f8cf8fbef57 |
|---|---|
| 1 <tool id="mzsqlite_psm_align" name="MzSQLite ProBED ProBAM" version="0.1.0"> | 1 <tool id="mzsqlite_psm_align" name="MzSQLite ProBAM ProBED" version="0.1.0"> |
| 2 <description>from mz.sqlite aand genomic mapping</description> | 2 <description>from mz.sqlite and genomic mapping</description> |
| 3 <requirements> | 3 <requirements> |
| 4 <requirement type="package">biopython</requirement> | 4 <requirement type="package">biopython</requirement> |
| 5 <requirement type="package">twobitreader</requirement> | 5 <requirement type="package">twobitreader</requirement> |
| 6 <requirement type="package">pysam</requirement> | 6 <requirement type="package">pysam</requirement> |
| 7 <requirement type="package">gffutils</requirement> | 7 <requirement type="package">gffutils</requirement> |
| 39 #end if | 39 #end if |
| 40 -v | 40 -v |
| 41 '$mzsqlitedb' '$genomicdb' | 41 '$mzsqlitedb' '$genomicdb' |
| 42 ]]></command> | 42 ]]></command> |
| 43 <inputs> | 43 <inputs> |
| 44 <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite databse"/> | 44 <param name="mzsqlitedb" type="data" format="mz.sqlite" label="mz.sqlite database" |
| 45 <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite databse"/> | 45 help="generated from mzIndentML by mz_to_sqlite"/> |
| 46 <param name="genomicdb" type="data" format="sqlite" label="genomic mapping sqlite database" | |
| 47 help="Genomic mapping for the Search proteins in the mzIdentML"/> | |
| 46 <conditional name="ref"> | 48 <conditional name="ref"> |
| 47 <param name="ref_source" type="select" label="Source for Genomic Sequence Data"> | 49 <param name="ref_source" type="select" label="Source for Genomic Sequence Data" |
| 50 help="Used to generate the genomic reference sequence for idenfied peptides"> | |
| 48 <option value="cached">Locally cached twobit</option> | 51 <option value="cached">Locally cached twobit</option> |
| 49 <option value="history">History dataset twobit</option> | 52 <option value="history">History dataset twobit</option> |
| 50 </param> | 53 </param> |
| 51 <when value="cached"> | 54 <when value="cached"> |
| 52 <param name="ref_loc" type="select" label="Select reference 2bit file"> | 55 <param name="ref_loc" type="select" label="Select reference 2bit file"> |
| 55 </when> | 58 </when> |
| 56 <when value="history"> | 59 <when value="history"> |
| 57 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" /> | 60 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" /> |
| 58 </when> | 61 </when> |
| 59 </conditional> | 62 </conditional> |
| 60 <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true"/> | 63 <param name="gffutilsdb" type="data" format="sqlite" label="gffutils sqlite database" optional="true" |
| 61 <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true"/> | 64 help="Categorizes the peptide by GTF feature for the proBAM XG tag" /> |
| 62 <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true"/> | 65 <param name="readlignments" type="data" format="bam" label="read alignments bam" optional="true" |
| 66 help="Allows proBAM SEQ field to be modified with observed variants"/> | |
| 67 <param name="genomicref" type="text" value="" label="Genome Reference name" optional="true" | |
| 68 help="The genome reference name to use in the proBED genomeReferenceVersion column"/> | |
| 63 <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true"> | 69 <param name="output_formats" type="select" display="checkboxes" label="outputs" multiple="true"> |
| 64 <option value="probam">pro.bam</option> | 70 <option value="probam" selected="true">pro.bam</option> |
| 65 <option value="prosam">pro.sam</option> | 71 <option value="prosam">pro.sam</option> |
| 66 <option value="probed">pro.bed</option> | 72 <option value="probed">pro.bed</option> |
| 67 </param> | 73 </param> |
| 68 </inputs> | 74 </inputs> |
| 69 <outputs> | 75 <outputs> |
| 77 <filter>'probed' in output_formats</filter> | 83 <filter>'probed' in output_formats</filter> |
| 78 </data> | 84 </data> |
| 79 </outputs> | 85 </outputs> |
| 80 <help><![CDATA[ | 86 <help><![CDATA[ |
| 81 | 87 |
| 82 Generates proBAM or proBED feature alignment files for peptides identified from a mass spectrometry protein search analysis. | 88 Generates proBAM_ or proBED_ feature alignment files for peptides identified from a mass spectrometry protein search analysis. |
| 83 | 89 |
| 84 The tool mz_to_sqlite generates the a SQLite database for a mzIdentML file, | 90 |
| 91 The tool mz_to_sqlite_ generates the a SQLite database for a mzIdentML file, | |
| 85 along with the fasta search database and the spectrum files used in the search. | 92 along with the fasta search database and the spectrum files used in the search. |
| 93 This mz.sqlite database is used in conjuction with a genomic mapping sqlite database | |
| 94 to generate the proBAM_ or proBED_ feature alignment files. | |
| 86 | 95 |
| 87 The genomic mapping sqlite database has this schema: | 96 The genomic mapping sqlite database has this schema: |
| 97 | |
| 98 :: | |
| 88 | 99 |
| 89 CREATE TABLE feature_cds_map ( /* One row for each exon in the search protein */ | 100 CREATE TABLE feature_cds_map ( /* One row for each exon in the search protein */ |
| 90 name TEXT, /* Accession name of search protein in mzIdentML */ | 101 name TEXT, /* Accession name of search protein in mzIdentML */ |
| 91 chrom TEXT, /* Reference genome chromosome for this exon */ | 102 chrom TEXT, /* Reference genome chromosome for this exon */ |
| 92 start INTEGER, /* genomic start of the exon (zero-based like BED) */ | 103 start INTEGER, /* genomic start of the exon (zero-based like BED) */ |
| 94 strand TEXT, /* genomic strand: '+' or '-' */ | 105 strand TEXT, /* genomic strand: '+' or '-' */ |
| 95 cds_start INTEGER, /* The CDS coding start for this exon (zero-based) */ | 106 cds_start INTEGER, /* The CDS coding start for this exon (zero-based) */ |
| 96 cds_end INTEGER /* The CDS coding start end this exon (non-inclusive) */ | 107 cds_end INTEGER /* The CDS coding start end this exon (non-inclusive) */ |
| 97 ); | 108 ); |
| 98 | 109 |
| 110 | |
| 99 Example: | 111 Example: |
| 112 | |
| 113 :: | |
| 114 | |
| 100 sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%'; | 115 sqlite> select * from feature_cds_map WHERE name like 'ENSMUSP00000000001%'; |
| 116 name chrom start end strand cds_start cds_end | |
| 101 ENSMUSP00000000001 chr3 108145887 108146005 - 0 118 | 117 ENSMUSP00000000001 chr3 108145887 108146005 - 0 118 |
| 102 ENSMUSP00000000001 chr3 108123794 108123837 - 118 161 | 118 ENSMUSP00000000001 chr3 108123794 108123837 - 118 161 |
| 103 ENSMUSP00000000001 chr3 108123541 108123683 - 161 303 | 119 ENSMUSP00000000001 chr3 108123541 108123683 - 161 303 |
| 104 ENSMUSP00000000001 chr3 108118300 108118458 - 303 461 | 120 ENSMUSP00000000001 chr3 108118300 108118458 - 303 461 |
| 105 ENSMUSP00000000001 chr3 108115762 108115891 - 461 590 | 121 ENSMUSP00000000001 chr3 108115762 108115891 - 461 590 |
| 106 ENSMUSP00000000001 chr3 108112472 108112602 - 590 720 | 122 ENSMUSP00000000001 chr3 108112472 108112602 - 590 720 |
| 107 ENSMUSP00000000001 chr3 108111934 108112088 - 720 874 | 123 ENSMUSP00000000001 chr3 108111934 108112088 - 720 874 |
| 108 ENSMUSP00000000001 chr3 108109421 108109612 - 874 1065 | 124 ENSMUSP00000000001 chr3 108109421 108109612 - 874 1065 |
| 109 | 125 |
| 126 Each row represents an exon in the search protein. | |
| 127 The locations: start,end, cds_start, and cds_end are **zero-based** like BED format. | |
| 128 | |
| 129 The **name** field must match the **accession** name used in the mz.sqlite database | |
| 130 and thus the mzIdentML search results file. | |
| 131 | |
| 132 The protein positions are described in CDS base offsets rather than Animo Acids offsets | |
| 133 to allow for codons being split across exons. | |
| 134 | |
| 110 This schema can describe structural variants as well as canonical transcripts. | 135 This schema can describe structural variants as well as canonical transcripts. |
| 111 | 136 |
| 137 .. _proBAM: http://www.psidev.info/probam | |
| 138 .. _proBED: http://www.psidev.info/probed | |
| 139 .. _mz_to_sqlite: https://toolshed.g2.bx.psu.edu/view/galaxyp/mz_to_sqlite/e34bdac5b157 | |
| 140 | |
| 112 ]]></help> | 141 ]]></help> |
| 142 <citations> | |
| 143 <citation type="doi">10.1186/s13059-017-1377-x</citation> | |
| 144 </citations> | |
| 113 </tool> | 145 </tool> |
