Previous changeset 5:d4d531006735 (2022-03-10) Next changeset 7:d728198f1ba5 (2022-03-15) |
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b" |
modified:
MaxQuantProcessingScript.R PhosphoPeptide_Upstream_Kinase_Mapping.pl mqppep_mrgfltr.py repository_dependencies.xml search_ppep.py |
b |
diff -r d4d531006735 -r 922d309640db MaxQuantProcessingScript.R --- a/MaxQuantProcessingScript.R Thu Mar 10 23:42:48 2022 +0000 +++ b/MaxQuantProcessingScript.R Fri Mar 11 20:04:05 2022 +0000 |
[ |
@@ -521,6 +521,14 @@ # --- quant_write <- cbind(metadata_df[, "Sequence window"], quant_data) colnames(quant_write)[1] <- "Sequence.Window" +write.table( + quant_write, + file = quant_file_name, + sep = "\t", + quote = FALSE, + col.names = TRUE, + row.names = FALSE +) # ... @@ -568,14 +576,6 @@ pct_multiphos <- sprintf("%0.1f%s", 100 * pct_multiphos, "%") # ... -write.table( - quant_data_qc_collapsed, - file = quant_file_name, - sep = "\t", - quote = FALSE, - col.names = TRUE, - row.names = FALSE -) # Compute and visualize breakdown of pY, pS, and pT before enrichment filter # --- |
b |
diff -r d4d531006735 -r 922d309640db PhosphoPeptide_Upstream_Kinase_Mapping.pl --- a/PhosphoPeptide_Upstream_Kinase_Mapping.pl Thu Mar 10 23:42:48 2022 +0000 +++ b/PhosphoPeptide_Upstream_Kinase_Mapping.pl Fri Mar 11 20:04:05 2022 +0000 |
[ |
b'@@ -25,7 +25,8 @@\n ###############################################################################################################################\r\n \r\n use strict;\r\n-use warnings;\r\n+#ACE use warnings;\r\n+use warnings \'FATAL\' => \'all\';\r\n \r\n use Getopt::Std;\r\n use DBD::SQLite::Constants qw/:file_open/;\r\n@@ -37,6 +38,9 @@\n #use Data::Dump qw(dump);\r\n \r\n my $USE_SEARCH_PPEP_PY = 1;\r\n+#my $FAILED_MATCH_SEQ = "Failed match";\r\n+my $FAILED_MATCH_SEQ = \'No Sequence\';\r\n+my $FAILED_MATCH_GENE_NAME = \'No_Gene_Name\';\r\n \r\n my $dirname = dirname(__FILE__);\r\n my %opts;\r\n@@ -45,7 +49,7 @@\n my ($fasta_in, $networkin_in, $motifs_in, $PSP_Kinase_Substrate_in, $PSP_Regulatory_Sites_in);\r\n my (@samples, %sample_id_lut, %ppep_id_lut, %data, @tmp_data, %n);\r\n my $line = 0;\r\n-my @failed_match = ("Failed match");\r\n+my @failed_match = ($FAILED_MATCH_SEQ);\r\n my @failed_matches;\r\n my (%all_data);\r\n my (@p_peptides, @non_p_peptides);\r\n@@ -571,6 +575,37 @@\n \r\n print "$#accessions accessions were read from the UniProtKB/Swiss-Prot $dbtype file\\n";\r\n \r\n+######################\r\n+ $dbh = DBI->connect("dbi:SQLite:$dbfile", undef, undef);\r\n+ $stmth = $dbh->prepare("\r\n+ INSERT INTO UniProtKB (\r\n+ Uniprot_ID,\r\n+ Description,\r\n+ Organism_Name,\r\n+ Organism_ID,\r\n+ Gene_Name,\r\n+ PE,\r\n+ SV,\r\n+ Sequence,\r\n+ Database\r\n+ ) VALUES (\r\n+ \'No Uniprot_ID\',\r\n+ \'NO_GENE_SYMBOL No Description\',\r\n+ \'No Organism_Name\',\r\n+ 0,\r\n+ \'$FAILED_MATCH_GENE_NAME\',\r\n+ \'0\',\r\n+ \'0\',\r\n+ \'$FAILED_MATCH_SEQ\',\r\n+ \'No Database\'\r\n+ )\r\n+ ");\r\n+ if (not $stmth->execute()) {\r\n+ print "Error inserting dummy row into UniProtKB: $stmth->errstr\\n";\r\n+ }\r\n+ $dbh->disconnect if ( defined $dbh );\r\n+######################\r\n+\r\n @timeData = localtime(time);\r\n print "\\n--- Start search at " . format_localtime_iso8601() ."\\n";\r\n \r\n@@ -579,6 +614,7 @@\n $i = system("\\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose");\r\n } else {\r\n $i = system("\\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in");\r\n+ #ACE DELETEME $i = system("\\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose");\r\n }\r\n if ($i) {\r\n print "python $dirname/search_ppep.py -u $db_out -p $file_in\\n exited with exit code $i\\n";\r\n@@ -628,7 +664,7 @@\n \r\n my %ppep_to_count_lut;\r\n print "start select peptide counts " . format_localtime_iso8601() . "\\n";\r\n-$stmth = $dbh->prepare("\r\n+my $uniprotkb_pep_ppep_view_stmth = $dbh->prepare("\r\n SELECT DISTINCT\r\n phosphopeptide\r\n , count(*) as i\r\n@@ -639,10 +675,10 @@\n ORDER BY\r\n phosphopeptide\r\n ");\r\n-if (not $stmth->execute()) {\r\n- die "Error fetching peptide counts: $stmth->errstr\\n";\r\n+if (not $uniprotkb_pep_ppep_view_stmth->execute()) {\r\n+ die "Error fetching peptide counts: $uniprotkb_pep_ppep_view_stmth->errstr\\n";\r\n }\r\n-while (my @row = $stmth->fetchrow_array) {\r\n+while (my @row = $uniprotkb_pep_ppep_view_stmth->fetchrow_array) {\r\n $ppep_to_count_lut{$row[0]} = $row[1];\r\n #print "\\$ppep_to_count_lut{$row[0]} = $ppep_to_count_lut{$row[0]}\\n";\r\n }\r\n@@ -662,7 +698,7 @@\n \r\n my %ppep_to_row_lut;\r\n print "start select all records without qualification " . format_localtime_iso8601() . "\\n";\r\n-$stmth = $dbh->prepare("\r\n+$uniprotkb_pep_ppep_view_stmth = $dbh->prepare("\r\n SELECT DISTINCT\r\n accession\r\n , peptide\r\n@@ -679,8 +715,8 @@\n ORDER BY\r\n phosphopeptide\r\n ");\r\n-if (not $stmth->execute()) {\r\n- die "Error fetching all records without qualification: $stmth->errstr\\n";\r\n+if (not $uniprotkb_pep_ppep_view_stmth->execute()) {\r\n+ die "Error fetching all records without qualification: $uniprotkb_pep_ppep_view_stmth->errstr\\n";\r\n }\r\n my $current_ppep;\r\n my $counter = 0;\r\n@@ -689,7 +725,7 @@\n @tmp_accessions = ();\r\n @tmp_names = ();\r\n @tmp_sites = ();\r\n-while (my @row = $stmth->fetchrow_array) {\r\n+while (my @row = $uniprotkb_pep_ppep_view_stmth->fetchrow_array) {\r\n # Identify phosphopeptide for current ro'..b' my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin\'s data\r\n- print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";\r\n- $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";\r\n- }\r\n- elsif ($j == $#tmp_p_residues) {\r\n- my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin\'s data\r\n- print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing\\t";\r\n- $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing";\r\n+ if ($#tmp_p_residues > 0) {\r\n+ for my $j (0 .. $#tmp_p_residues) {\r\n+ if ($j < $#tmp_p_residues) {\r\n+ if (defined $p_residues{$peptide}{$i}[$j]) {\r\n+ my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin\'s data\r\n+ $foobar = $residues{$peptide}{$i}[$j];\r\n+ if (defined $foobar) {\r\n+ print OUT "$foobar";\r\n+ print OUT "$tmp_site_for_printing, ";\r\n+ $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";\r\n+ $there_were_sites = 1;\r\n+ }\r\n+ }\r\n+ }\r\n+ elsif ($j == $#tmp_p_residues) {\r\n+ if (defined $p_residues{$peptide}{$i}[$j]) {\r\n+ $foobar = $residues{$peptide}{$i}[$j];\r\n+ if (defined $foobar) {\r\n+ my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin\'s data\r\n+ print OUT "$foobar";\r\n+ print OUT "$tmp_site_for_printing\\t";\r\n+ #ACE print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing\\t";\r\n+ $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing";\r\n+ $there_were_sites = 1;\r\n+ }\r\n+ }\r\n+ }\r\n }\r\n }\r\n- } else {\r\n+ }\r\n+ if (0 == $there_were_sites) {\r\n print OUT "\\t";\r\n }\r\n }\r\n }\r\n+ print "tmp_for_insert \'$tmp_for_insert\' for \'$peptide\'\\n" if $verbose_cond;\r\n push (@ppep_metadata, $tmp_for_insert);\r\n \r\n- # Print the UNIQUE phospho-motifs\r\n- # Column 6:\r\n+ # Column 6: UNIQUE phospho-motifs\r\n print OUT join(" /// ", @{$unique_motifs{$peptide}}), "\\t";\r\n push (@ppep_metadata, join(" /// ", @{$unique_motifs{$peptide}}));\r\n \r\n- # Print the accessions\r\n- # Column 7:\r\n+ # Column 7: accessions\r\n if (defined $accessions{$peptide}) {\r\n print OUT join(" /// ", @{$accessions{$peptide}}), "\\t";\r\n push (@ppep_metadata, join(" /// ", @{$accessions{$peptide}}));\r\n@@ -1967,8 +2019,7 @@\n push (@ppep_metadata, "");\r\n }\r\n \r\n- # print ALL motifs with residue numbers\r\n- # Column 8:\r\n+ # Column 8: ALL motifs with residue numbers\r\n if (defined $p_motifs{$peptide}) {\r\n print OUT join(" /// ", @{$p_motifs{$peptide}}), "\\t";\r\n push (@ppep_metadata, join(" /// ", @{$p_motifs{$peptide}}));\r\n' |
b |
diff -r d4d531006735 -r 922d309640db mqppep_mrgfltr.py --- a/mqppep_mrgfltr.py Thu Mar 10 23:42:48 2022 +0000 +++ b/mqppep_mrgfltr.py Fri Mar 11 20:04:05 2022 +0000 |
[ |
@@ -44,6 +44,19 @@ return None +def whine(func, *args, handle=lambda e: e, **kwargs): + + try: + return func(*args, **kwargs) + except Exception as e: + print("Warning: For %s" % str(func)) + print(" with args %s" % str(args)) + print(" caught exception: %s" % str(e)) + (ty, va, tb) = sys.exc_info() + print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) + return None + + def ppep_join(x): x = [i for i in x if N_A != i] result = "%s" % " | ".join(x) @@ -683,16 +696,16 @@ # message = pe.message)) # ) # ) + if phospho_pep not in PhosphoPep_UniProtSeq_LUT: + raise PreconditionError( + phospho_pep, + "no matching phosphopeptide found in PhosphoPep_UniProtSeq_LUT", + ) if dephospho_pep not in DephosphoPep_UniProtSeq_LUT: raise PreconditionError( dephospho_pep, "dephosphorylated phosphopeptide not found in DephosphoPep_UniProtSeq_LUT", ) - if phospho_pep not in PhosphoPep_UniProtSeq_LUT: - raise PreconditionError( - dephospho_pep, - "no matching phosphopeptide found in PhosphoPep_UniProtSeq_LUT", - ) if ( dephospho_pep != PhosphoPep_UniProtSeq_LUT[(phospho_pep, DEPHOSPHOPEP)] @@ -739,6 +752,10 @@ seq10s = [] while i < len(ploc): start = UniProtSeq.find(dephospho_pep) + # handle case where no sequence was found for dep-pep + if start < 0: + i += 1 + continue psite = ( start + ploc[i] ) # location of phosphoresidue on protein sequence @@ -869,7 +886,6 @@ # val is a string if val not in joined_set: joined_set.add(val) - # joined_value += sep + '; '.join(map(str, val)) joined_value += sep + val sep = "; " # joined_value is a string @@ -914,9 +930,10 @@ return [phospho_pep, result] # Construct list of [string, dictionary] lists - # where the dictionary provides the SwissProt metadata for a phosphopeptide + # where the dictionary provides the SwissProt metadata + # for a phosphopeptide result_list = [ - catch(pseq_to_subdict, psequence) + whine(pseq_to_subdict, psequence) for psequence in data_in[PHOSPHOPEPTIDE_MATCH] ] @@ -1021,7 +1038,8 @@ how="left", ) - # after merging df, select only the columns of interest - note that PROTEIN is absent here + # after merging df, select only the columns of interest; + # note that PROTEIN is absent here merge_df = merge_df[ [ PHOSPHOPEPTIDE, @@ -1033,7 +1051,8 @@ ON_NOTES, ] ] - # combine column values of interest into one FUNCTION_PHOSPHORESIDUE column" + # combine column values of interest + # into one FUNCTION_PHOSPHORESIDUE column" merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[ON_FUNCTION].str.cat( merge_df[ON_PROCESS], sep="; ", na_rep="" ) |
b |
diff -r d4d531006735 -r 922d309640db repository_dependencies.xml --- a/repository_dependencies.xml Thu Mar 10 23:42:48 2022 +0000 +++ b/repository_dependencies.xml Fri Mar 11 20:04:05 2022 +0000 |
b |
@@ -1,5 +1,5 @@ <?xml version="1.0" ?> <repositories description="Suite for preprocessing and ANOVA of MaxQuant results using LC-MS proteomics data from phosphoproteomic enrichment."> - <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="8be24b489992"/> - <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="cfc65ae227f8"/> + <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="b91809a18dbe"/> + <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="d4d531006735"/> </repositories> \ No newline at end of file |
b |
diff -r d4d531006735 -r 922d309640db search_ppep.py --- a/search_ppep.py Thu Mar 10 23:42:48 2022 +0000 +++ b/search_ppep.py Fri Mar 11 20:04:05 2022 +0000 |
[ |
@@ -516,6 +516,16 @@ for row in ker.fetchall(): print(row[0]) + # link peptides not found in sequence database to a dummy sequence-record + ker.execute( + """ + INSERT INTO deppep_UniProtKB(deppep_id,UniProtKB_id,pos_start,pos_end) + SELECT id, 'No Uniprot_ID', 0, 0 + FROM deppep + WHERE id NOT IN (SELECT deppep_id FROM deppep_UniProtKB) + """ + ) + con.commit() ker.execute("vacuum") con.close() |