Mercurial > repos > greg > gregs_test_repo
changeset 9:57045525eba1
Deleted selected files
author | greg |
---|---|
date | Thu, 21 Jul 2011 09:53:43 -0400 |
parents | d76734cdef77 |
children | 09951b7d29a4 |
files | blast2go-7b53cc52e7ed/.hg_archival.txt blast2go-7b53cc52e7ed/tools/ncbi_blast_plus/blast2go.loc.sample blast2go-7b53cc52e7ed/tools/ncbi_blast_plus/blast2go.py blast2go-7b53cc52e7ed/tools/ncbi_blast_plus/blast2go.txt blast2go-7b53cc52e7ed/tools/ncbi_blast_plus/blast2go.xml |
diffstat | 4 files changed, 0 insertions(+), 369 deletions(-) [+] |
line wrap: on
line diff
--- a/blast2go-7b53cc52e7ed/.hg_archival.txt Tue Jul 19 13:54:17 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -repo: 4bfd64cf18ab5d0fe74e14afdb6634d8a5f9abb2 -node: 7b53cc52e7eda18a49312bfab66a962d4b6ada71 -branch: default -latesttag: null -latesttagdistance: 2
--- a/blast2go-7b53cc52e7ed/tools/ncbi_blast_plus/blast2go.py Tue Jul 19 13:54:17 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,147 +0,0 @@ -#!/usr/bin/env python -"""Galaxy wrapper for Blast2GO for pipelines, b2g4pipe v2.3.5. - -This script takes exactly three command line arguments: - * Input BLAST XML filename - * Blast2GO properties filename (settings file) - * Output tabular filename - -Sadly b2g4pipe v2.3.5 cannot cope with current style large BLAST XML -files (e.g. from BLAST 2.2.25+), so we have to reformat these to -avoid it crashing with a Java heap space OutOfMemoryError. - -As part of this reformatting, we check for BLASTP or BLASTX output -(otherwise raise an error), and print the query count. - -It then calls the Java command line tool, and moves the output file to -the location Galaxy is expecting, and removes the tempory XML file. -""" -import sys -import os -import subprocess - -#You may need to edit this to match your local setup, -blast2go_jar = "/opt/b2g4pipe/blast2go.jar" - - -def stop_err(msg, error_level=1): - """Print error message to stdout and quit with given error level.""" - sys.stderr.write("%s\n" % msg) - sys.exit(error_level) - -if len(sys.argv) != 4: - stop_err("Require three arguments: XML filename, properties filename, output tabular filename") - -xml_file, prop_file, tabular_file = sys.argv[1:] - -#We should have write access here: -tmp_xml_file = tabular_file + ".tmp.xml" - -if not os.path.isfile(xml_file): - stop_err("Input BLAST XML file not found: %s" % xml_file) - -if not os.path.isfile(prop_file): - stop_err("Blast2GO configuration file not found: %s" % prop_file) - -def prepare_xml(original_xml, mangled_xml): - """Reformat BLAST XML to suit Blast2GO. - - Blast2GO can't cope with 1000s of <Iteration> tags within a - single <BlastResult> tag, so instead split this into one - full XML record per interation (i.e. per query). This gives - a concatenated XML file mimicing old versions of BLAST. - - This also checks for BLASTP or BLASTX output, and outputs - the number of queries. Galaxy will show this as "info". - """ - in_handle = open(original_xml) - footer = " </BlastOutput_iterations>\n</BlastOutput>\n" - header = "" - while True: - line = in_handle.readline() - if not line: - #No hits? - stop_err("Problem with XML file?") - if line.strip() == "<Iteration>": - break - header += line - - if "<BlastOutput_program>blastx</BlastOutput_program>" in header: - print "BLASTX output identified" - elif "<BlastOutput_program>blastp</BlastOutput_program>" in header: - print "BLASTP output identified" - else: - in_handle.close() - stop_err("Expect BLASTP or BLASTX output") - - out_handle = open(mangled_xml, "w") - out_handle.write(header) - out_handle.write(line) - count = 1 - while True: - line = in_handle.readline() - if not line: - break - elif line.strip() == "<Iteration>": - #Insert footer/header - out_handle.write(footer) - out_handle.write(header) - count += 1 - out_handle.write(line) - - out_handle.close() - in_handle.close() - print "Input has %i queries" % count - - -def run(cmd): - #Avoid using shell=True when we call subprocess to ensure if the Python - #script is killed, so too is the child process. - try: - child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except Exception, err: - stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) - #Use .communicate as can get deadlocks with .wait(), - stdout, stderr = child.communicate() - return_code = child.returncode - if return_code: - cmd_str = " ".join(cmd) - if stderr and stdout: - stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) - else: - stop_err("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) - #For early diagnostics, - else: - print stdout - print stderr - -if not os.path.isfile(blast2go_jar): - stop_err("Blast2GO JAR file not found: %s" % blast2go_jar) - -prepare_xml(xml_file, tmp_xml_file) -#print "XML file prepared for Blast2GO" - -#We will have write access wherever the output should be, -#so we'll ask Blast2GO to use that as the stem for its output -#(it will append .annot to the filename) -cmd = ["java", "-jar", blast2go_jar, - "-in", tmp_xml_file, - "-prop", prop_file, - "-out", tabular_file, #Used as base name for output files - "-a", # Generate *.annot tabular file - #"-img", # Generate images, feature not in v2.3.5 - ] -#print " ".join(cmd) -run(cmd) - -#Remove the temp XML file -os.remove(tmp_xml_file) - -out_file = tabular_file + ".annot" -if not os.path.isfile(out_file): - stop_err("ERROR - No output annotation file from Blast2GO") - -#Move the output file where Galaxy expects it to be: -os.rename(out_file, tabular_file) - -print "Done"
--- a/blast2go-7b53cc52e7ed/tools/ncbi_blast_plus/blast2go.txt Tue Jul 19 13:54:17 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,127 +0,0 @@ -Galaxy wrapper for Blast2GO for pipelines, b2g4pipe -=================================================== - -This wrapper is copyright 2011 by Peter Cock, The James Hutton Institute -(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. -See the licence text below. - -This is a wrapper for the command line Java tool b2g4pipe v2.3.5, -Blast2GO for pipelines. See: - -S. Götz et al. -High-throughput functional annotation and data mining with the Blast2GO suite. -Nucleic Acids Res. 36(10):3420–3435, 2008. -http://dx.doi.org/10.1093/nar/gkn176 - -A. Conesa and S. Götz. -Blast2GO: A Comprehensive Suite for Functional Analysis in Plant Genomics. -Int. J. Plant Genomics. 619832, 2008. -http://dx.doi.org/10.1155/2008/619832 - -A. Conesa et al. -Blast2GO: A universal tool for annotation, visualization and analysis in functional genomics research. -Bioinformatics 21:3674-3676, 2005. -http://dx.doi.org/10.1093/bioinformatics/bti610 - -http://www.blast2go.org/ - - - -Installation -============ - -You can change the path by editing the definition near the start of the Python -script blast2go.py, but by default it expects the underlying tool to be here: - -/opt/b2g4pip/blast2go.jar - -To install the wrapper copy or move the following files under the Galaxy tools -folder, e.g. in the tools/ncbi_blast_blast folder: - -* blast2go.xml (the Galaxy tool definition) -* blast2go.py (the Python wrapper script) -* blast2go.txt (this README file) - -You will also need to modify the tools_conf.xml file to tell Galaxy to offer the -tool. We suggest putting it next to the NCBI BLAST+ wrappers. Just add the line: - -<tool file="ncbi_blast_plus/blast2go.xml" /> - -As part of setting up b2g4pipe you will need to setup one or more Blast2GO -property files which tell the tool which database to use etc. The example -b2gPipe.properties provided with b2g4pipe v2.3.5 is out of date, with the -latest server IP address and database name given on the Blast2GO website. -These files can be anywhere accessable to the Galaxy Unix user, we put them -under /opt/b2g4pipe with the JAR file etc. - -You must tell Galaxy about these Blast2GO property files so that they can be -offered to the user. Create the file tool-data/blast2go.loc under the Galaxy -folder. This must be plain text, tab separated, with three columns: - -(1) ID for the setup, e.g. Spain_2010_May -(2) Description for the setup, e.g. Database in Spain (May 2010) -(3) Properties filename for the setup, e.g. /opt/b2g4pipe/Spain_2010_May.properties - -Avoid including "Blast2GO" in the description (column 2) as this will be -included in the automatically assigned output dataset name. The blast2go.loc -file allows you to customise the database setup. If for example you have a local -Blast2GO server running (which we recommend for speed), and you want this to be -the default setting, include it as the first line in your blast2go.loc file. - -Consult the Blast2GO documentation for details about the property files and -setting up a local MySQL Blast2GO database. - - -History -======= - -v0.0.1 - Initial public release -v0.0.2 - Documentation clarifications, e.g. concatenated BLAST XML is allowed. - - Fixed error handler in wrapper script (for when b2g4pipe fails). - - Reformats the XML to use old NCBI-style concatenated BLAST XML since - b2g4pipe crashes with heap space error on with large files using - current NCBI output. - - -Developers -========== - -This script and related tools are being developed on the following hg branch: -http://bitbucket.org/peterjc/galaxy-central/src/tools - -For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball I use -the following command from the Galaxy root folder: - -$ tar -czf blast2go.tar.gz tools/ncbi_blast_plus/blast2go.xml tools/ncbi_blast_plus/blast2go.py tools/ncbi_blast_plus/blast2go.txt - -Check this worked: - -$ tar -tzf blast2go.tar.gz -tools/ncbi_blast_plus/blast2go.xml -tools/ncbi_blast_plus/blast2go.py -tools/ncbi_blast_plus/blast2go.txt - - -Licence (MIT/BSD style) -======================= - -Permission to use, copy, modify, and distribute this software and its -documentation with or without modifications and for any purpose and -without fee is hereby granted, provided that any copyright notices -appear in all copies and that both those copyright notices and this -permission notice appear in supporting documentation, and that the -names of the contributors or copyright holders not be used in -advertising or publicity pertaining to distribution of the software -without specific prior permission. - -THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT -OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -OR PERFORMANCE OF THIS SOFTWARE. - -NOTE: This is the licence for the Galaxy Wrapper only. Blast2GO and -associated data files are available and licenced separately.
--- a/blast2go-7b53cc52e7ed/tools/ncbi_blast_plus/blast2go.xml Tue Jul 19 13:54:17 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -<tool id="blast2go" name="Blast2GO" version="0.0.2"> - <description>Maps BLAST results to GO annotation terms</description> - <command interpreter="python"> - blast2go.py $xml ${prop.fields.path} $tab - </command> - <inputs> - <param name="xml" type="data" format="blastxml" label="BLAST XML results" description="You must have run BLAST against a protein database such as the NCBI non-redundant (NR) database. Use BLASTX for nucleotide queries, BLASTP for protein queries." /> - <param name="prop" type="select" label="Blast2GO settings" description="One or more configurations can be setup, such as using the Blast2GO team's server in Spain, or a local database."> - <options from_file="blast2go.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - </inputs> - <outputs> - <data name="tab" format="tabular" label="Blast2GO ${prop.fields.name}" /> - </outputs> - <requirements> - </requirements> - <tests> - </tests> - <help> -.. class:: warningmark - -**Note**. Blast2GO may take a substantial amount of time, especially if -running against the public server in Spain. For large input datasets it -is advisable to allow overnight processing, or consider subdividing. - ------ - -**What it does** - -This runs b2g4Pipe, the command line (no GUI) version of Blast2GO designed -for use in pipelines. - -It takes as input BLAST XML results against a protein database, typically -the NCBI non-redundant (NR) database. This tool will accept concatenated -BLAST XML files (although they are technically invalid XML), which is very -useful if you have sub-divided your protein FASTA files and run BLAST on -them in batches. - -The BLAST matches are used to assign Gene Ontology (GO) annotation terms -to each query sequence. - -The output from this tool is a tabular file containing three columns, with -the order taken from query order in the original BLAST XML file: - -====== ==================================== -Column Description ------- ------------------------------------ - 1 ID and description of query sequence - 2 GO term - 3 GO description -====== ==================================== - -Note that if no GO terms are assigned to a sequence (e.g. if it had no -BLAST matches), then it will not be present in the output file. - - -**Advanced Settings** - -Blast2GO has a properties setting file which includes which database -server to connect to (e.g. the public server in Valencia, Spain, or a -local server), as well as more advanced options such as thresholds and -evidence code weights. To change these settings, your Galaxy administrator -must create a new properties file, and add it to the drop down menu above. - - -**References** - -S. Götz et al. -High-throughput functional annotation and data mining with the Blast2GO suite. -Nucleic Acids Res. 36(10):3420–3435, 2008. -http://dx.doi.org/10.1093/nar/gkn176 - -A. Conesa and S. Götz. -Blast2GO: A Comprehensive Suite for Functional Analysis in Plant Genomics. -Int. J. Plant Genomics. 619832, 2008. -http://dx.doi.org/10.1155/2008/619832 - -A. Conesa et al. -Blast2GO: A universal tool for annotation, visualization and analysis in functional genomics research. -Bioinformatics 21:3674-3676, 2005. -http://dx.doi.org/10.1093/bioinformatics/bti610 - -http://www.blast2go.org/ - - </help> -</tool>