Mercurial > repos > jjohnson > spectrast
comparison link_scan_datasets.py @ 3:7f02fc51bddf draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/spectrast commit 379705f578f9a0465f497894c7d2b5f68b6a55e6-dirty
| author | jjohnson |
|---|---|
| date | Wed, 25 Jul 2018 10:58:17 -0400 |
| parents | |
| children | 274fdc50169b |
comparison
equal
deleted
inserted
replaced
| 2:e67b0cc10377 | 3:7f02fc51bddf |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 from __future__ import print_function | |
| 4 | |
| 5 import argparse | |
| 6 import difflib | |
| 7 from difflib import SequenceMatcher | |
| 8 import os | |
| 9 import os.path | |
| 10 import sys | |
| 11 import xml.sax | |
| 12 | |
| 13 | |
| 14 | |
| 15 def __main__(): | |
| 16 parser = argparse.ArgumentParser( | |
| 17 description='link spectrum datasets to the name used' + | |
| 18 ' in the identification dataset') | |
| 19 parser.add_argument( | |
| 20 'ident_files', nargs='+', | |
| 21 help='Pepxml or mzIdentML') | |
| 22 parser.add_argument( | |
| 23 '-n', '--scan_name', default=[], action='append', | |
| 24 help='Name for scan file') | |
| 25 parser.add_argument( | |
| 26 '-f', '--scan_file', default=[], action='append', | |
| 27 help='Path for scan file') | |
| 28 args = parser.parse_args() | |
| 29 | |
| 30 class MzidHandler( xml.sax.ContentHandler): | |
| 31 def __init__(self): | |
| 32 xml.sax.ContentHandler.__init__(self) | |
| 33 self.spectraDataFiles = [] | |
| 34 self.spectraDataNames = [] | |
| 35 self.searchDatabaseFiles = [] | |
| 36 self.searchDatabaseNames = [] | |
| 37 def startElement(self, tag, attrs): | |
| 38 if tag == 'SpectraData': | |
| 39 id = attrs['id'] | |
| 40 path = attrs['location'] | |
| 41 filename = os.path.basename(path) | |
| 42 name = attrs['name'] if 'name' in attrs else None | |
| 43 self.spectraDataFiles.append(filename) | |
| 44 self.spectraDataNames.append(name if name else id) | |
| 45 print ("SpectraData: %s %s" % (name if name else id, path)) | |
| 46 if tag == 'SearchDatabase': | |
| 47 id = attrs['id'] | |
| 48 path = attrs['location'] | |
| 49 filename = os.path.basename(path) | |
| 50 name = attrs['name'] if 'name' in attrs else None | |
| 51 self.searchDatabaseFiles.append(filename) | |
| 52 self.searchDatabaseNames.append(name if name else id) | |
| 53 print ("SearchDatabase: %s %s" % (name if name else id, path)) | |
| 54 def endElement( self, name): | |
| 55 pass | |
| 56 def characters( self, data): | |
| 57 pass | |
| 58 | |
| 59 class PepXmlHandler( xml.sax.ContentHandler): | |
| 60 def __init__(self): | |
| 61 xml.sax.ContentHandler.__init__(self) | |
| 62 self.spectraDataFiles = [] | |
| 63 self.spectraDataNames = [] | |
| 64 def startElement(self, tag, attrs): | |
| 65 if tag == 'msms_run_summary': | |
| 66 basename = attrs['base_name'] | |
| 67 name = os.path.basename(basename) | |
| 68 ext = attrs['raw_data'] | |
| 69 path = '%s%s' % (basename,ext) | |
| 70 filename = os.path.basename(path) | |
| 71 self.spectraDataFiles.append(filename) | |
| 72 self.spectraDataNames.append(name) | |
| 73 print ("SpectraData: %s %s" % (name, path)) | |
| 74 def endElement( self, name): | |
| 75 pass | |
| 76 def characters( self, data): | |
| 77 pass | |
| 78 | |
| 79 parser = xml.sax.make_parser() | |
| 80 parser.setFeature(xml.sax.handler.feature_namespaces, 0) | |
| 81 handler = PepXmlHandler() | |
| 82 parser.setContentHandler( handler ) | |
| 83 for ident in args.ident_files: | |
| 84 parser.parse(ident) | |
| 85 | |
| 86 spectra_names = handler.spectraDataFiles | |
| 87 | |
| 88 def best_match(name): | |
| 89 if name in spectra_names: | |
| 90 return name | |
| 91 try: | |
| 92 r = [SequenceMatcher(None, name, spectra_names[x]).ratio() for x in range(len(spectra_names))] | |
| 93 return spectra_names[r.index(max(r))] | |
| 94 except Exception, e: | |
| 95 print ("best_match: %s %s" % (name, e)) | |
| 96 | |
| 97 for i,name in enumerate(args.scan_name): | |
| 98 path = args.scan_file[i] if len(args.scan_file) > i else '' | |
| 99 (root, ext) = os.path.splitext(name) | |
| 100 print ("SpectraFile: %s %s" % (name, path)) | |
| 101 iname = best_match(name) | |
| 102 print ("IdentName: %s %s" % (name, iname)) | |
| 103 if not os.path.exists(iname) and os.path.exists(path): | |
| 104 os.symlink(path, iname) | |
| 105 print ("%s -> %s" % (iname, path)) | |
| 106 | |
| 107 | |
| 108 if __name__ == "__main__": | |
| 109 __main__() | |
| 110 |
