# HG changeset patch # User iracooke # Date 1370496780 14400 # Node ID adf8ac7aa6c85315b1fb80aad49f7497b5c9dfe4 Uploaded diff -r 000000000000 -r adf8ac7aa6c8 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,5 @@ +This package installs wrapper scripts for Peptide Prophet, iProphet and Protein Prophet + +Requirements: +This package depends on the galaxy_protk and protk_trans_proteomic_pipeline packages +Please see instructions for those packages before installing diff -r 000000000000 -r adf8ac7aa6c8 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,8 @@ +## What is it? +Galaxy tool definition files and wrapper scripts for Peptide and Protein inference tools in the [Trans Proteomic Pipeline](http://tools.proteomecenter.org/wiki/index.php?title=Software:TPP) (Peptide Prophet, iProphet and Protein Prophet). + +## Installation +Install from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/ + +All the tools depend on command-line scripts and databases available in the [protk ruby gem](https://bitbucket.org/iracooke/protk). + diff -r 000000000000 -r adf8ac7aa6c8 interprophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet.xml Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,61 @@ + + + + galaxy_protk + trans_proteomic_pipeline + + + + Combine Peptide Prophet results from multiple search engines + + + + interprophet_wrapper.rb $output $use_nss $use_nrs $use_nse $use_nsi $use_nsm --minprob $minprob + + + ## Inputs. + ${first_input} + #for $input_file in $input_files: + ${input_file.additional_input} + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Takes a set of pepXML files (possibly generated using different search engines) and calculates updated identification probabilities for each peptide. The updated probabilities are based on a statistical model that combines evidence from identifications across all of the input files, spectra, modified states and charge states. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing iProphet + +Shteynberg D, et al. “iProphet: Improved statistical validation of peptide identifications in shotgun proteomics.” *Molecular and Cellular Proteomics* 10, M111.007690 (2011). + + + + diff -r 000000000000 -r adf8ac7aa6c8 interprophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet_wrapper.rb Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,56 @@ +require 'pathname' + +$VERBOSE=nil + +# Hard-Coded argument order and number of arguments +# +actual_output_path_string=ARGV[0] +use_nss=ARGV[1] +use_nrs=ARGV[2] +use_nse=ARGV[3] +use_nsi=ARGV[4] +use_nsm=ARGV[5] +minprob=ARGV[6] +minprob_val=ARGV[7] + +wd= Dir.pwd +original_input_files=ARGV.drop(7) +# End hard coded args # + +cmd="" + +output_substitution_cmds="" + +input_files=original_input_files.collect do |input| + + # We append ".pep.xml" to the input file name because interprophet can't handle anything else + # In order for this to work properly we need to create a symbolic link our working directory + # + original_input_path=Pathname.new("#{input}") + actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" + + cmd << "ln -s #{input} #{actual_input_path_string};" + output_substitution_cmds << "ruby -pi -e \"gsub('#{actual_input_path_string}', '#{input}.pep.xml')\" interprophet_output.pep.xml;" + actual_input_path_string +end + +cmd << "rvm 1.9.3@protk-1.2.0 do interprophet.rb" + +cmd << " --no-nss" unless use_nss=="blank" +cmd << " --no-nrs" unless use_nrs=="blank" +cmd << " --no-nse" unless use_nse=="blank" +cmd << " --no-nsi" unless use_nsi=="blank" +cmd << " --no-nsm" unless use_nsm=="blank" + + +input_files.each { |input| + cmd << " #{input}" +} + + +cmd << " -o interprophet_output.pep.xml -r" + +cmd << ";#{output_substitution_cmds}" + +%x[#{cmd}] + diff -r 000000000000 -r adf8ac7aa6c8 peptide_prophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet.xml Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,83 @@ + + + galaxy_protk + trans_proteomic_pipeline + + + Calculate Peptide Prophet statistics on search results + + peptide_prophet_wrapper.rb ${output} ${input_file} -r $glyco $useicat $phospho $usepi $usert $accurate_mass $no_ntt $no_nmc $use_gamma $use_only_expect $force_fit $allow_alt_instruments $maldi + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Given raw search engine scores as inputs this tool estimates the accuracy of peptide assignments. From a practical perspective it estimates the probability that each peptide assignment is correct (providing probabilities as outputs), given raw scores (possibly on some arbitrary scale) as inputs. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing the statistical model implemented by Peptide Prophet + +Keller A., et al. “Empirical Statistical Model to Estimate the Accuracy of Peptide Identifications Made by MS/MS and Database Search” *Anal. Chem.* 74, 5383-5392 (2002). + + + + + + + + diff -r 000000000000 -r adf8ac7aa6c8 peptide_prophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet_wrapper.rb Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,36 @@ +require 'pathname' + +$VERBOSE=nil + +actual_output_path_string=ARGV.shift + +# Second argument is the original input file name ... we'll change this below +original_input_file=ARGV[0] + +# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input +# name because peptide prophet can't handle anything else + +wd= Dir.pwd + +original_input_path=Pathname.new("#{original_input_file}") +actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" +full_tmp_output_path_string="#{wd}/peptide_prophet_output.pep.xml" + +cmd = "ln -s #{original_input_file} #{actual_input_path_string};" + +cmd << "rvm 1.9.3@protk-1.2.0 do peptide_prophet.rb" + + +ARGV[0]="#{actual_input_path_string}" + +ARGV.each { |a| + cmd << " #{a}" +} + +cmd << " -o peptide_prophet_output.pep.xml" + +# Finally we need to fix up the output file so any references to the temporary working file are changed to refs to the original input file +cmd << ";ruby -pi -e \"gsub('#{actual_input_path_string}', '#{original_input_file}')\" peptide_prophet_output.pep.xml" +cmd << ";ruby -pi -e \"gsub('#{full_tmp_output_path_string}', '#{actual_output_path_string}')\" peptide_prophet_output.pep.xml" + +p %x[#{cmd}] diff -r 000000000000 -r adf8ac7aa6c8 pepxml_to_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pepxml_to_table.xml Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,29 @@ + + + + galaxy_protk + + + + + Converts a pepXML file to a tab delimited text file + + + +rvm 1.9.3@protk-1.2.0 do pepxml_to_table.rb $input_file -o $output + + + + + + + + + + + + + Convert a pepXML file to Tab delimited text + + + diff -r 000000000000 -r adf8ac7aa6c8 protein_prophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet.xml Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,71 @@ + + + galaxy_protk + trans_proteomic_pipeline + + + Calculate Protein Prophet statistics on search results + + + + protein_prophet_wrapper.rb --galaxy $input_file -r $iproph $nooccam $groupwts $normprotlen $logprobs $confem $allpeps $unmapped $instances $delude --minprob=$minprob --minindep=$minindep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Given a set of peptide assignments from MS/MS spectra in the form of a pepXML file, this tool estimates probabilities at the protein level. As output, the tool produces a protXML file, which contains proteins along with the estimated probabilities that those proteins were present. Probabilities are estimated using a statistical model based on the number of peptides corresponding to that protein and the confidence that each of those peptides were assigned correctly. It takes account of the fact that peptides may correspond to more than one protein. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing the statistical model implemented by Protein Prophet + +Nesvizhskii A., et al. “A Statistical Model for Identifying Proteins by Tandem Mass Spectrometry” *Anal. Chem.* 75, 4646-4658 (2003). + + + + + diff -r 000000000000 -r adf8ac7aa6c8 protein_prophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet_wrapper.rb Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,35 @@ +require 'pathname' + +$VERBOSE=nil + +actual_output_path_string=ARGV.shift + +# Second argument is the original input file name ... we'll change this below +original_input_file=ARGV[0] + +# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input +# name because peptide prophet can't handle anything else + +wd= Dir.pwd + +original_input_path=Pathname.new("#{original_input_file}") +actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" + +cmd = "ln -s #{original_input_file} #{actual_input_path_string};" + +cmd << "rvm 1.9.3@protk-1.2.0 do protein_prophet.rb" + + +ARGV[0]="#{actual_input_path_string}" + +ARGV.each { |a| + + cmd << " #{a}" +} + +cmd << " -o protein_prophet_results.prot.xml" + +cmd << ";ruby -pi -e \"gsub('#{actual_input_path_string}', '#{original_input_file}.pep.xml')\" protein_prophet_results.prot.xml" + +%x[#{cmd}] + diff -r 000000000000 -r adf8ac7aa6c8 repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,6 @@ + + + + + + diff -r 000000000000 -r adf8ac7aa6c8 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jun 06 01:33:00 2013 -0400 @@ -0,0 +1,11 @@ + + + + + + + + + + +