Next changeset 1:4198976f4e5e (2014-03-10) |
Commit message:
Uploaded |
added:
README datatypes_conf.xml display_applications/proteomics/PepXml.xml display_applications/proteomics/ProtGff.xml display_applications/proteomics/ProtXml.xml display_applications/proteomics/mzML.xml proteomics.py tool-data/proteogenomics_display_site.txt.sample tool-data/protk_display_site.txt.sample |
b |
diff -r 000000000000 -r 7101f7e4b00b README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Wed May 08 03:25:50 2013 -0400 |
[ |
@@ -0,0 +1,9 @@ +## What is it? +Galaxy datatype and display-application definitions for Proteomics data + +## Installation +Install into your local galaxy instance from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/ + +To visualize data you will need to install the protviz visualization web application. This is available at +[https://bitbucket.org/Andrew_Brock/proteomics-visualise](https://bitbucket.org/Andrew_Brock/proteomics-visualise) + |
b |
diff -r 000000000000 -r 7101f7e4b00b datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Wed May 08 03:25:50 2013 -0400 |
b |
@@ -0,0 +1,52 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="proteomics.py"/> + </datatype_files> + <registration display_path="display_applications"> + <datatype extension="prot_gff" type="galaxy.datatypes.proteomics:ProtGff" mimetype="application/xml" display_in_upload="true"> + <display file="proteomics/ProtGff.xml" /> + </datatype> + <datatype extension="pepxml" type="galaxy.datatypes.proteomics:PepXml" mimetype="application/xml" display_in_upload="true"> + <display file="proteomics/PepXml.xml" /> + </datatype> + <datatype extension="raw_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true"> + <display file="proteomics/PepXml.xml" /> + </datatype> + <datatype extension="peptideprophet_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true"> + <display file="proteomics/PepXml.xml" /> + </datatype> + <datatype extension="interprophet_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true"> + <display file="proteomics/PepXml.xml" /> + </datatype> + <datatype extension="protxml" type="galaxy.datatypes.proteomics:ProtXML" display_in_upload="true" > + <display file="proteomics/ProtXml.xml"/> + </datatype> + <datatype extension="mascotdat" type="galaxy.datatypes.proteomics:MascotDat" display_in_upload="false" /> + <datatype extension="mzml" type="galaxy.datatypes.proteomics:MzML" mimetype="application/xml" display_in_upload="true"> + <display file="proteomics/mzML.xml"/> + </datatype> + <datatype extension="mgf" type="galaxy.datatypes.proteomics:Mgf" display_in_upload="true" /> + <datatype extension="xls" type="galaxy.datatypes.proteomics:Xls" display_in_upload="true" /> + <datatype extension="mzxml" type="galaxy.datatypes.proteomics:MzXML" mimetype="application/xml" display_in_upload="true" /> + <datatype extension="mzq" type="galaxy.datatypes.proteomics:MzQuantML" mimetype="application/xml" display_in_upload="true" /> + <datatype extension="mzid" type="galaxy.datatypes.proteomics:MzIdentML" mimetype="application/xml" display_in_upload="true" /> + <datatype extension="traML" type="galaxy.datatypes.proteomics:TraML" mimetype="application/xml" display_in_upload="true" /> + <datatype extension="raw" type="galaxy.datatypes.proteomics:RAW" display_in_upload="true" /> + <datatype extension="msp" type="galaxy.datatypes.proteomics:Msp" display_in_upload="true" /> + <datatype extension="ms2" type="galaxy.datatypes.proteomics:Ms2" display_in_upload="true" /> + <datatype extension="hlf" type="galaxy.datatypes.proteomics:XHunterAslFormat" display_in_upload="true" /> + </registration> + <sniffers> + <sniffer type="galaxy.datatypes.proteomics:ProtGff"/> + <sniffer type="galaxy.datatypes.proteomics:MzML"/> + <sniffer type="galaxy.datatypes.proteomics:PepXml"/> + <sniffer type="galaxy.datatypes.proteomics:Mgf"/> + <sniffer type="galaxy.datatypes.proteomics:ProtXML"/> + <sniffer type="galaxy.datatypes.proteomics:MzXML"/> + <sniffer type="galaxy.datatypes.proteomics:TraML"/> + <sniffer type="galaxy.datatypes.proteomics:MzIdentML"/> + <sniffer type="galaxy.datatypes.proteomics:MzQuantML"/> + <sniffer type="galaxy.datatypes.proteomics:Xls"/> + </sniffers> +</datatypes> |
b |
diff -r 000000000000 -r 7101f7e4b00b display_applications/proteomics/PepXml.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/PepXml.xml Wed May 08 03:25:50 2013 -0400 |
[ |
@@ -0,0 +1,18 @@ +<display id="proteomics_pepxml" version="1.0.0" name="view pepXML in"> + <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0"> + <!-- Define parameters by column from file --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_url" value="1"/> + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&type=pepxml</url> + <param type="data" name="pep_file" viewable="False" format="pepXML"/> + <param type="data" dataset="pep_file" name="pepxml_file" format="pepXML" viewable="False" /> + <param type="template" name="encoded_filename" strip="True" > + #import binascii + ${binascii.hexlify( $pepxml_file.file_name )} + </param> + <param type="template" name="galaxy_url" strip="True" > + ${BASE_URL.split(":")[1][2:]} + </param> + </dynamic_links> +</display> |
b |
diff -r 000000000000 -r 7101f7e4b00b display_applications/proteomics/ProtGff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/ProtGff.xml Wed May 08 03:25:50 2013 -0400 |
[ |
@@ -0,0 +1,18 @@ +<display id="proteomics_gff" version="1.0.0" name="view gff in"> + <dynamic_links from_file="tool-data/proteogenomics_display_site.txt" skip_startswith="#" id="0" name="0"> + <!-- Define parameters by column from file --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_url" value="1"/> + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&type=protgff</url> + <param type="data" name="prot_file" viewable="False" format="protgff"/> + <param type="data" dataset="prot_file" name="protgff_file" format="protgff" viewable="False" /> + <param type="template" name="encoded_filename" strip="True" > + #import binascii + ${binascii.hexlify( $protgff_file.file_name )} + </param> + <param type="template" name="galaxy_url" strip="True" > + ${BASE_URL.split(":")[1][2:]} + </param> + </dynamic_links> +</display> \ No newline at end of file |
b |
diff -r 000000000000 -r 7101f7e4b00b display_applications/proteomics/ProtXml.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/ProtXml.xml Wed May 08 03:25:50 2013 -0400 |
[ |
@@ -0,0 +1,18 @@ +<display id="proteomics_protxml" version="1.0.0" name="view protXML in"> + <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0"> + <!-- Define parameters by column from file --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_url" value="1"/> + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&type=protxml</url> + <param type="data" name="prot_file" viewable="False" format="protXML"/> + <param type="data" dataset="prot_file" name="protxml_file" format="protXML" viewable="False" /> + <param type="template" name="encoded_filename" strip="True" > + #import binascii + ${binascii.hexlify( $protxml_file.file_name )} + </param> + <param type="template" name="galaxy_url" strip="True" > + ${BASE_URL.split(":")[1][2:]} + </param> + </dynamic_links> +</display> \ No newline at end of file |
b |
diff -r 000000000000 -r 7101f7e4b00b display_applications/proteomics/mzML.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/mzML.xml Wed May 08 03:25:50 2013 -0400 |
[ |
@@ -0,0 +1,18 @@ +<display id="proteomics_mzml" version="1.0.2" name="view mzML data"> + <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0"> + <!-- Define parameters by column from file --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_url" value="1"/> + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&type=mzml</url> + <param type="data" name="raw_file" viewable="False" format="mzML"/> + <param type="data" dataset="raw_file" name="mzml_file" format="mzML" viewable="False" /> + <param type="template" name="encoded_filename" strip="True" > + #import binascii + ${binascii.hexlify( $mzml_file.file_name )} + </param> + <param type="template" name="galaxy_url" strip="True" > + ${BASE_URL.split(":")[1][2:]} + </param> + </dynamic_links> +</display> \ No newline at end of file |
b |
diff -r 000000000000 -r 7101f7e4b00b proteomics.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proteomics.py Wed May 08 03:25:50 2013 -0400 |
[ |
b'@@ -0,0 +1,276 @@\n+"""\n+Proteomics format classes\n+"""\n+import logging\n+import re\n+from galaxy.datatypes.data import *\n+from galaxy.datatypes.xml import *\n+from galaxy.datatypes.sniff import *\n+from galaxy.datatypes.binary import *\n+from galaxy.datatypes.interval import *\n+\n+log = logging.getLogger(__name__)\n+\n+class ProtGff( Gff ):\n+ """Tab delimited data in Gff format"""\n+ file_ext = "prot_gff"\n+ def set_peek( self, dataset, is_multi_byte=False ):\n+ """Set the peek and blurb text"""\n+ if not dataset.dataset.purged:\n+ dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )\n+ dataset.blurb = \'Proteogenomics GFF\'\n+ else:\n+ dataset.peek = \'file does not exist\'\n+ dataset.blurb = \'file purged from disk\'\n+\n+ def sniff( self, filename ):\n+ handle = open(filename)\n+ xmlns_re = re.compile("^##gff-version")\n+ for i in range(3):\n+ line = handle.readline()\n+ if xmlns_re.match(line.strip()):\n+ handle.close()\n+ return True\n+\n+ handle.close()\n+ return False\n+\n+\n+class Xls( Binary ):\n+ """Class describing a binary excel spreadsheet file"""\n+ file_ext = "xls"\n+\n+ def set_peek( self, dataset, is_multi_byte=False ):\n+ if not dataset.dataset.purged:\n+ dataset.peek = "Excel Spreadsheet file"\n+ dataset.blurb = data.nice_size( dataset.get_size() )\n+ else:\n+ dataset.peek = \'file does not exist\'\n+ dataset.blurb = \'file purged from disk\'\n+ def display_peek( self, dataset ):\n+ try:\n+ return dataset.peek\n+ except:\n+ return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )\n+\n+class ProteomicsXml(GenericXml):\n+ """ An enhanced XML datatype used to reuse code across several\n+ proteomic/mass-spec datatypes. """\n+\n+ def sniff(self, filename):\n+ """ Determines whether the file is the correct XML type. """\n+ with open(filename, \'r\') as contents: \n+ while True:\n+ line = contents.readline()\n+ if line == None or not line.startswith(\'<?\'):\n+ break\n+ pattern = \'^<(\\w*:)?%s\' % self.root # pattern match <root or <ns:root for any ns string\n+ return line != None and re.match(pattern, line) != None\n+\n+ def set_peek( self, dataset, is_multi_byte=False ):\n+ """Set the peek and blurb text"""\n+ if not dataset.dataset.purged:\n+ dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )\n+ dataset.blurb = self.blurb\n+ else:\n+ dataset.peek = \'file does not exist\'\n+ dataset.blurb = \'file purged from disk\'\n+\n+class PepXml(ProteomicsXml):\n+ """pepXML data"""\n+ file_ext = "pepxml"\n+ blurb = \'pepXML data\'\n+ root = "msms_pipeline_analysis"\n+ \n+\n+class MzML(ProteomicsXml):\n+ """mzML data"""\n+ file_ext = "mzml"\n+ blurb = \'mzML Mass Spectrometry data\'\n+ root = "(mzML|indexedmzML)"\n+\n+\n+class ProtXML(ProteomicsXml):\n+ """protXML data"""\n+ file_ext = "protxml"\n+ blurb = \'prot XML Search Results\'\n+ root = "protein_summary"\n+\n+\n+class MzXML(ProteomicsXml):\n+ """mzXML data"""\n+ file_ext = "mzXML"\n+ blurb = "mzXML Mass Spectrometry data"\n+ root = "mzXML"\n+\n+## PSI datatypes\n+class MzIdentML(ProteomicsXml):\n+ file_ext = "mzid"\n+ blurb = "XML identified peptides and proteins."\n+ root = "MzIdentML"\n+ \n+\n+class TraML(ProteomicsXml):\n+ file_ext = "traML"\n+ blurb = "TraML transition list"\n+ root = "TraML"\n+\n+\n+class MzQuantML(ProteomicsXml):\n+ file_ext = "mzq"\n+ blurb = "XML quantification data"\n+ root = "MzQuantML"\n+\n+ \n+class Mgf( Text ):\n+ """Mascot Generic Format data"""\n+ file_ext = "mgf"\n+\n+ def set_peek( self, dataset, is_multi_byte=False ):\n+ """Set the peek and blurb text"""\n+ '..b' mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)"\n+ max_lines=10\n+\n+ for i, line in enumerate( file( filename ) ):\n+ line = line.rstrip( \'\\n\\r\' )\n+ if line==mime_version:\n+ return True\n+ if i>max_lines:\n+ return False\n+\n+\n+class RAW( Binary ):\n+ """Class describing a Thermo Finnigan binary RAW file"""\n+ file_ext = "raw"\n+ def sniff( self, filename ):\n+ # Thermo Finnigan RAW format is proprietary and hence not well documented.\n+ # Files start with 2 bytes that seem to differ followed by F\\0i\\0n\\0n\\0i\\0g\\0a\\0n\n+ # This combination represents 17 bytes, but to play safe we read 20 bytes from \n+ # the start of the file.\n+ try:\n+ header = open( filename ).read(20)\n+ hexheader = binascii.b2a_hex( header )\n+ finnigan = binascii.hexlify( \'F\\0i\\0n\\0n\\0i\\0g\\0a\\0n\' )\n+ if hexheader.find(finnigan) != -1:\n+ return True\n+ return False\n+ except:\n+ return False\n+ def set_peek( self, dataset, is_multi_byte=False ):\n+ if not dataset.dataset.purged:\n+ dataset.peek = "Thermo Finnigan RAW file"\n+ dataset.blurb = data.nice_size( dataset.get_size() )\n+ else:\n+ dataset.peek = \'file does not exist\'\n+ dataset.blurb = \'file purged from disk\'\n+ def display_peek( self, dataset ):\n+ try:\n+ return dataset.peek\n+ except:\n+ return "Thermo Finnigan RAW file (%s)" % ( data.nice_size( dataset.get_size() ) )\n+\n+\n+if hasattr(Binary, \'register_sniffable_binary_format\'):\n+ Binary.register_sniffable_binary_format(\'RAW\', \'RAW\', RAW)\n+\n+\n+class Msp(Text):\n+ """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """\n+ file_ext = "msp"\n+ \n+ @staticmethod\n+ def next_line_starts_with(contents, prefix):\n+ next_line = contents.readline()\n+ return next_line != None and next_line.startswith(prefix)\n+\n+ def sniff(self, filename):\n+ """ Determines whether the file is a NIST MSP output file. \n+\n+ >>> fname = get_test_fname(\'test.msp\') \n+ >>> Msp().sniff(fname)\n+ True\n+ >>> fname = get_test_fname(\'test.mzXML\')\n+ >>> Msp().sniff(fname)\n+ False\n+ """\n+ with open(filename, \'r\') as contents:\n+ return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "MW:")\n+\n+class Ms2(Text):\n+ file_ext = "ms2"\n+ \n+ def sniff(self, filename):\n+ """ Determines whether the file is a valid ms2 file. \n+\n+ >>> fname = get_test_fname(\'test.msp\') \n+ >>> Ms2().sniff(fname)\n+ False\n+ >>> fname = get_test_fname(\'test.ms2\')\n+ >>> Ms2().sniff(fname)\n+ True\n+ """\n+\n+ with open(filename, \'r\') as contents:\n+ header_lines = []\n+ while True:\n+ line = contents.readline()\n+ if line == None or len(line) == 0:\n+ pass\n+ elif line.startswith(\'H\\t\'):\n+ header_lines.append(line)\n+ else:\n+ break\n+ for header_field in [\'CreationDate\', \'Extractor\', \'ExtractorVersion\', \'ExtractorOptions\']:\n+ found_header = False\n+ for header_line in header_lines:\n+ if header_line.startswith(\'H\\t%s\' % (header_field)):\n+ found_header = True\n+ break\n+ if not found_header:\n+ return False\n+\n+ return True\n+\n+# unsniffable binary format, should do something about this\n+class XHunterAslFormat(Binary):\n+ """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """\n+ file_ext = "hlf"\n+\n+\n+if hasattr(Binary, \'register_unsniffable_binary_ext\'):\n+ Binary.register_unsniffable_binary_ext(\'hlf\')\n' |
b |
diff -r 000000000000 -r 7101f7e4b00b tool-data/proteogenomics_display_site.txt.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteogenomics_display_site.txt.sample Wed May 08 03:25:50 2013 -0400 |
b |
@@ -0,0 +1,3 @@ +#Proteomic Visualization application should be hosted on the same server as galaxy +#Entries in this file are of the format "site_id" site_url +Proteogenomics Browser http://127.0.0.1:8600 |
b |
diff -r 000000000000 -r 7101f7e4b00b tool-data/protk_display_site.txt.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/protk_display_site.txt.sample Wed May 08 03:25:50 2013 -0400 |
b |
@@ -0,0 +1,3 @@ +#Proteomic Visualization application should be hosted on the same server as galaxy +#Entries in this file are of the format "site_id" site_url +Proteomics Visualize http://127.0.0.1:8500 |