Mercurial > repos > jjohnson > spectrast
comparison link_scan_datasets.py @ 5:274fdc50169b draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/spectrast commit e5b5b15b0a995c8588ff62f92fd0a2329fb7a6a5-dirty
| author | jjohnson |
|---|---|
| date | Wed, 25 Jul 2018 15:05:34 -0400 |
| parents | 7f02fc51bddf |
| children |
comparison
equal
deleted
inserted
replaced
| 4:c9bfe6adb7cd | 5:274fdc50169b |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 | 2 |
| 3 from __future__ import print_function | 3 from __future__ import print_function |
| 4 | 4 |
| 5 import argparse | 5 import argparse |
| 6 import difflib | |
| 7 from difflib import SequenceMatcher | |
| 8 import os | 6 import os |
| 9 import os.path | 7 import os.path |
| 10 import sys | |
| 11 import xml.sax | 8 import xml.sax |
| 12 | 9 from difflib import SequenceMatcher |
| 13 | 10 |
| 14 | 11 |
| 15 def __main__(): | 12 def __main__(): |
| 16 parser = argparse.ArgumentParser( | 13 parser = argparse.ArgumentParser( |
| 17 description='link spectrum datasets to the name used' + | 14 description='link spectrum datasets to the name used' + |
| 18 ' in the identification dataset') | 15 ' in the identification dataset') |
| 19 parser.add_argument( | 16 parser.add_argument( |
| 20 'ident_files', nargs='+', | 17 'ident_files', nargs='+', |
| 21 help='Pepxml or mzIdentML') | 18 help='Pepxml or mzIdentML') |
| 22 parser.add_argument( | 19 parser.add_argument( |
| 23 '-n', '--scan_name', default=[], action='append', | 20 '-n', '--scan_name', default=[], action='append', |
| 24 help='Name for scan file') | 21 help='Name for scan file') |
| 25 parser.add_argument( | 22 parser.add_argument( |
| 26 '-f', '--scan_file', default=[], action='append', | 23 '-f', '--scan_file', default=[], action='append', |
| 27 help='Path for scan file') | 24 help='Path for scan file') |
| 28 args = parser.parse_args() | 25 args = parser.parse_args() |
| 29 | 26 |
| 30 class MzidHandler( xml.sax.ContentHandler): | 27 class MzidHandler(xml.sax.ContentHandler): |
| 28 | |
| 31 def __init__(self): | 29 def __init__(self): |
| 32 xml.sax.ContentHandler.__init__(self) | 30 xml.sax.ContentHandler.__init__(self) |
| 33 self.spectraDataFiles = [] | 31 self.spectraDataFiles = [] |
| 34 self.spectraDataNames = [] | 32 self.spectraDataNames = [] |
| 35 self.searchDatabaseFiles = [] | 33 self.searchDatabaseFiles = [] |
| 36 self.searchDatabaseNames = [] | 34 self.searchDatabaseNames = [] |
| 35 | |
| 37 def startElement(self, tag, attrs): | 36 def startElement(self, tag, attrs): |
| 38 if tag == 'SpectraData': | 37 if tag == 'SpectraData': |
| 39 id = attrs['id'] | 38 id = attrs['id'] |
| 40 path = attrs['location'] | 39 path = attrs['location'] |
| 41 filename = os.path.basename(path) | 40 filename = os.path.basename(path) |
| 49 filename = os.path.basename(path) | 48 filename = os.path.basename(path) |
| 50 name = attrs['name'] if 'name' in attrs else None | 49 name = attrs['name'] if 'name' in attrs else None |
| 51 self.searchDatabaseFiles.append(filename) | 50 self.searchDatabaseFiles.append(filename) |
| 52 self.searchDatabaseNames.append(name if name else id) | 51 self.searchDatabaseNames.append(name if name else id) |
| 53 print ("SearchDatabase: %s %s" % (name if name else id, path)) | 52 print ("SearchDatabase: %s %s" % (name if name else id, path)) |
| 54 def endElement( self, name): | 53 |
| 55 pass | 54 def endElement(self, name): |
| 56 def characters( self, data): | |
| 57 pass | 55 pass |
| 58 | 56 |
| 59 class PepXmlHandler( xml.sax.ContentHandler): | 57 def characters(self, data): |
| 58 pass | |
| 59 | |
| 60 class PepXmlHandler(xml.sax.ContentHandler): | |
| 61 | |
| 60 def __init__(self): | 62 def __init__(self): |
| 61 xml.sax.ContentHandler.__init__(self) | 63 xml.sax.ContentHandler.__init__(self) |
| 62 self.spectraDataFiles = [] | 64 self.spectraDataFiles = [] |
| 63 self.spectraDataNames = [] | 65 self.spectraDataNames = [] |
| 66 | |
| 64 def startElement(self, tag, attrs): | 67 def startElement(self, tag, attrs): |
| 65 if tag == 'msms_run_summary': | 68 if tag == 'msms_run_summary': |
| 66 basename = attrs['base_name'] | 69 basename = attrs['base_name'] |
| 67 name = os.path.basename(basename) | 70 name = os.path.basename(basename) |
| 68 ext = attrs['raw_data'] | 71 ext = attrs['raw_data'] |
| 69 path = '%s%s' % (basename,ext) | 72 path = '%s%s' % (basename, ext) |
| 70 filename = os.path.basename(path) | 73 filename = os.path.basename(path) |
| 71 self.spectraDataFiles.append(filename) | 74 self.spectraDataFiles.append(filename) |
| 72 self.spectraDataNames.append(name) | 75 self.spectraDataNames.append(name) |
| 73 print ("SpectraData: %s %s" % (name, path)) | 76 print ("SpectraData: %s %s" % (name, path)) |
| 74 def endElement( self, name): | 77 |
| 78 def endElement(self, name): | |
| 75 pass | 79 pass |
| 76 def characters( self, data): | 80 |
| 81 def characters(self, data): | |
| 77 pass | 82 pass |
| 78 | 83 |
| 79 parser = xml.sax.make_parser() | 84 parser = xml.sax.make_parser() |
| 80 parser.setFeature(xml.sax.handler.feature_namespaces, 0) | 85 parser.setFeature(xml.sax.handler.feature_namespaces, 0) |
| 81 handler = PepXmlHandler() | 86 handler = PepXmlHandler() |
| 82 parser.setContentHandler( handler ) | 87 parser.setContentHandler(handler) |
| 83 for ident in args.ident_files: | 88 for ident in args.ident_files: |
| 84 parser.parse(ident) | 89 parser.parse(ident) |
| 85 | 90 |
| 86 spectra_names = handler.spectraDataFiles | 91 spectra_names = handler.spectraDataFiles |
| 87 | 92 |
| 88 def best_match(name): | 93 def best_match(name): |
| 89 if name in spectra_names: | 94 if name in spectra_names: |
| 90 return name | 95 return name |
| 91 try: | 96 try: |
| 92 r = [SequenceMatcher(None, name, spectra_names[x]).ratio() for x in range(len(spectra_names))] | 97 r = [SequenceMatcher(None, name, spectra_names[x]).ratio() |
| 98 for x in range(len(spectra_names))] | |
| 93 return spectra_names[r.index(max(r))] | 99 return spectra_names[r.index(max(r))] |
| 94 except Exception, e: | 100 except Exception, e: |
| 95 print ("best_match: %s %s" % (name, e)) | 101 print ("best_match: %s %s" % (name, e)) |
| 96 | 102 |
| 97 for i,name in enumerate(args.scan_name): | 103 for i, name in enumerate(args.scan_name): |
| 98 path = args.scan_file[i] if len(args.scan_file) > i else '' | 104 path = args.scan_file[i] if len(args.scan_file) > i else '' |
| 99 (root, ext) = os.path.splitext(name) | 105 (root, ext) = os.path.splitext(name) |
| 100 print ("SpectraFile: %s %s" % (name, path)) | 106 print ("SpectraFile: %s %s" % (name, path)) |
| 101 iname = best_match(name) | 107 iname = best_match(name) |
| 102 print ("IdentName: %s %s" % (name, iname)) | 108 print ("IdentName: %s %s" % (name, iname)) |
| 105 print ("%s -> %s" % (iname, path)) | 111 print ("%s -> %s" % (iname, path)) |
| 106 | 112 |
| 107 | 113 |
| 108 if __name__ == "__main__": | 114 if __name__ == "__main__": |
| 109 __main__() | 115 __main__() |
| 110 |
