Mercurial > repos > peterjc > blast2go
comparison tools/ncbi_blast_plus/blast2go.py @ 1:7b53cc52e7ed
Migrated tool version 0.0.2 from old tool shed archive to new tool shed repository
| author | peterjc |
|---|---|
| date | Tue, 07 Jun 2011 15:51:46 -0400 |
| parents | 4bfd64cf18ab |
| children | 5cb24d07c316 |
comparison
equal
deleted
inserted
replaced
| 0:4bfd64cf18ab | 1:7b53cc52e7ed |
|---|---|
| 4 This script takes exactly three command line arguments: | 4 This script takes exactly three command line arguments: |
| 5 * Input BLAST XML filename | 5 * Input BLAST XML filename |
| 6 * Blast2GO properties filename (settings file) | 6 * Blast2GO properties filename (settings file) |
| 7 * Output tabular filename | 7 * Output tabular filename |
| 8 | 8 |
| 9 Sadly b2g4pipe v2.3.5 cannot cope with current style large BLAST XML | |
| 10 files (e.g. from BLAST 2.2.25+), so we have to reformat these to | |
| 11 avoid it crashing with a Java heap space OutOfMemoryError. | |
| 12 | |
| 13 As part of this reformatting, we check for BLASTP or BLASTX output | |
| 14 (otherwise raise an error), and print the query count. | |
| 15 | |
| 9 It then calls the Java command line tool, and moves the output file to | 16 It then calls the Java command line tool, and moves the output file to |
| 10 the location Galaxy is expecting. | 17 the location Galaxy is expecting, and removes the tempory XML file. |
| 11 """ | 18 """ |
| 12 import sys | 19 import sys |
| 13 import os | 20 import os |
| 14 import subprocess | 21 import subprocess |
| 15 | 22 |
| 25 if len(sys.argv) != 4: | 32 if len(sys.argv) != 4: |
| 26 stop_err("Require three arguments: XML filename, properties filename, output tabular filename") | 33 stop_err("Require three arguments: XML filename, properties filename, output tabular filename") |
| 27 | 34 |
| 28 xml_file, prop_file, tabular_file = sys.argv[1:] | 35 xml_file, prop_file, tabular_file = sys.argv[1:] |
| 29 | 36 |
| 37 #We should have write access here: | |
| 38 tmp_xml_file = tabular_file + ".tmp.xml" | |
| 39 | |
| 30 if not os.path.isfile(xml_file): | 40 if not os.path.isfile(xml_file): |
| 31 stop_err("Input BLAST XML file not found: %s" % xml_file) | 41 stop_err("Input BLAST XML file not found: %s" % xml_file) |
| 32 | 42 |
| 33 if not os.path.isfile(prop_file): | 43 if not os.path.isfile(prop_file): |
| 34 stop_err("Blast2GO configuration file not found: %s" % prop_file) | 44 stop_err("Blast2GO configuration file not found: %s" % prop_file) |
| 45 | |
| 46 def prepare_xml(original_xml, mangled_xml): | |
| 47 """Reformat BLAST XML to suit Blast2GO. | |
| 48 | |
| 49 Blast2GO can't cope with 1000s of <Iteration> tags within a | |
| 50 single <BlastResult> tag, so instead split this into one | |
| 51 full XML record per interation (i.e. per query). This gives | |
| 52 a concatenated XML file mimicing old versions of BLAST. | |
| 53 | |
| 54 This also checks for BLASTP or BLASTX output, and outputs | |
| 55 the number of queries. Galaxy will show this as "info". | |
| 56 """ | |
| 57 in_handle = open(original_xml) | |
| 58 footer = " </BlastOutput_iterations>\n</BlastOutput>\n" | |
| 59 header = "" | |
| 60 while True: | |
| 61 line = in_handle.readline() | |
| 62 if not line: | |
| 63 #No hits? | |
| 64 stop_err("Problem with XML file?") | |
| 65 if line.strip() == "<Iteration>": | |
| 66 break | |
| 67 header += line | |
| 68 | |
| 69 if "<BlastOutput_program>blastx</BlastOutput_program>" in header: | |
| 70 print "BLASTX output identified" | |
| 71 elif "<BlastOutput_program>blastp</BlastOutput_program>" in header: | |
| 72 print "BLASTP output identified" | |
| 73 else: | |
| 74 in_handle.close() | |
| 75 stop_err("Expect BLASTP or BLASTX output") | |
| 76 | |
| 77 out_handle = open(mangled_xml, "w") | |
| 78 out_handle.write(header) | |
| 79 out_handle.write(line) | |
| 80 count = 1 | |
| 81 while True: | |
| 82 line = in_handle.readline() | |
| 83 if not line: | |
| 84 break | |
| 85 elif line.strip() == "<Iteration>": | |
| 86 #Insert footer/header | |
| 87 out_handle.write(footer) | |
| 88 out_handle.write(header) | |
| 89 count += 1 | |
| 90 out_handle.write(line) | |
| 91 | |
| 92 out_handle.close() | |
| 93 in_handle.close() | |
| 94 print "Input has %i queries" % count | |
| 95 | |
| 35 | 96 |
| 36 def run(cmd): | 97 def run(cmd): |
| 37 #Avoid using shell=True when we call subprocess to ensure if the Python | 98 #Avoid using shell=True when we call subprocess to ensure if the Python |
| 38 #script is killed, so too is the child process. | 99 #script is killed, so too is the child process. |
| 39 try: | 100 try: |
| 42 stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 103 stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
| 43 #Use .communicate as can get deadlocks with .wait(), | 104 #Use .communicate as can get deadlocks with .wait(), |
| 44 stdout, stderr = child.communicate() | 105 stdout, stderr = child.communicate() |
| 45 return_code = child.returncode | 106 return_code = child.returncode |
| 46 if return_code: | 107 if return_code: |
| 108 cmd_str = " ".join(cmd) | |
| 47 if stderr and stdout: | 109 if stderr and stdout: |
| 48 stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr)) | 110 stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) |
| 49 else: | 111 else: |
| 50 stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr)) | 112 stop_err("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) |
| 51 #For early diagnostics, | 113 #For early diagnostics, |
| 52 else: | 114 else: |
| 53 print stdout | 115 print stdout |
| 54 print stderr | 116 print stderr |
| 55 | 117 |
| 56 if not os.path.isfile(blast2go_jar): | 118 if not os.path.isfile(blast2go_jar): |
| 57 stop_err("Blast2GO JAR file not found: %s" % blast2go_jar) | 119 stop_err("Blast2GO JAR file not found: %s" % blast2go_jar) |
| 58 | 120 |
| 59 #We will have write access whereever the output should be, | 121 prepare_xml(xml_file, tmp_xml_file) |
| 122 #print "XML file prepared for Blast2GO" | |
| 123 | |
| 124 #We will have write access wherever the output should be, | |
| 60 #so we'll ask Blast2GO to use that as the stem for its output | 125 #so we'll ask Blast2GO to use that as the stem for its output |
| 61 #(it will append .annot to the filename) | 126 #(it will append .annot to the filename) |
| 62 cmd = ["java", "-jar", blast2go_jar, | 127 cmd = ["java", "-jar", blast2go_jar, |
| 63 "-in", xml_file, | 128 "-in", tmp_xml_file, |
| 64 "-prop", prop_file, | 129 "-prop", prop_file, |
| 65 "-out", tabular_file, | 130 "-out", tabular_file, #Used as base name for output files |
| 66 "-a"] | 131 "-a", # Generate *.annot tabular file |
| 132 #"-img", # Generate images, feature not in v2.3.5 | |
| 133 ] | |
| 134 #print " ".join(cmd) | |
| 67 run(cmd) | 135 run(cmd) |
| 136 | |
| 137 #Remove the temp XML file | |
| 138 os.remove(tmp_xml_file) | |
| 68 | 139 |
| 69 out_file = tabular_file + ".annot" | 140 out_file = tabular_file + ".annot" |
| 70 if not os.path.isfile(out_file): | 141 if not os.path.isfile(out_file): |
| 71 stop_err("ERROR - No output annotation file from Blast2GO") | 142 stop_err("ERROR - No output annotation file from Blast2GO") |
| 72 | 143 |
