Repository 'mqppep_anova'
hg clone https://eddie.galaxyproject.org/repos/eschen42/mqppep_anova

Changeset 6:922d309640db (2022-03-11)
Previous changeset 5:d4d531006735 (2022-03-10) Next changeset 7:d728198f1ba5 (2022-03-15)
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
modified:
MaxQuantProcessingScript.R
PhosphoPeptide_Upstream_Kinase_Mapping.pl
mqppep_mrgfltr.py
repository_dependencies.xml
search_ppep.py
b
diff -r d4d531006735 -r 922d309640db MaxQuantProcessingScript.R
--- a/MaxQuantProcessingScript.R Thu Mar 10 23:42:48 2022 +0000
+++ b/MaxQuantProcessingScript.R Fri Mar 11 20:04:05 2022 +0000
[
@@ -521,6 +521,14 @@
 # ---
 quant_write <- cbind(metadata_df[, "Sequence window"], quant_data)
 colnames(quant_write)[1] <- "Sequence.Window"
+write.table(
+  quant_write,
+  file = quant_file_name,
+  sep = "\t",
+  quote = FALSE,
+  col.names = TRUE,
+  row.names = FALSE
+)
 # ...
 
 
@@ -568,14 +576,6 @@
 pct_multiphos <- sprintf("%0.1f%s", 100 * pct_multiphos, "%")
 # ...
 
-write.table(
-  quant_data_qc_collapsed,
-  file = quant_file_name,
-  sep = "\t",
-  quote = FALSE,
-  col.names = TRUE,
-  row.names = FALSE
-)
 
 # Compute and visualize breakdown of pY, pS, and pT before enrichment filter
 # ---
b
diff -r d4d531006735 -r 922d309640db PhosphoPeptide_Upstream_Kinase_Mapping.pl
--- a/PhosphoPeptide_Upstream_Kinase_Mapping.pl Thu Mar 10 23:42:48 2022 +0000
+++ b/PhosphoPeptide_Upstream_Kinase_Mapping.pl Fri Mar 11 20:04:05 2022 +0000
[
b'@@ -25,7 +25,8 @@\n ###############################################################################################################################\r\n \r\n use strict;\r\n-use warnings;\r\n+#ACE use warnings;\r\n+use warnings \'FATAL\' => \'all\';\r\n \r\n use Getopt::Std;\r\n use DBD::SQLite::Constants qw/:file_open/;\r\n@@ -37,6 +38,9 @@\n #use Data::Dump qw(dump);\r\n \r\n my $USE_SEARCH_PPEP_PY = 1;\r\n+#my $FAILED_MATCH_SEQ = "Failed match";\r\n+my $FAILED_MATCH_SEQ = \'No Sequence\';\r\n+my $FAILED_MATCH_GENE_NAME = \'No_Gene_Name\';\r\n \r\n my $dirname = dirname(__FILE__);\r\n my %opts;\r\n@@ -45,7 +49,7 @@\n my ($fasta_in, $networkin_in, $motifs_in, $PSP_Kinase_Substrate_in, $PSP_Regulatory_Sites_in);\r\n my (@samples, %sample_id_lut, %ppep_id_lut, %data, @tmp_data, %n);\r\n my $line = 0;\r\n-my @failed_match = ("Failed match");\r\n+my @failed_match = ($FAILED_MATCH_SEQ);\r\n my @failed_matches;\r\n my (%all_data);\r\n my (@p_peptides, @non_p_peptides);\r\n@@ -571,6 +575,37 @@\n \r\n print "$#accessions accessions were read from the UniProtKB/Swiss-Prot $dbtype file\\n";\r\n \r\n+######################\r\n+  $dbh = DBI->connect("dbi:SQLite:$dbfile", undef, undef);\r\n+  $stmth = $dbh->prepare("\r\n+  INSERT INTO UniProtKB (\r\n+    Uniprot_ID,\r\n+    Description,\r\n+    Organism_Name,\r\n+    Organism_ID,\r\n+    Gene_Name,\r\n+    PE,\r\n+    SV,\r\n+    Sequence,\r\n+    Database\r\n+  ) VALUES (\r\n+    \'No Uniprot_ID\',\r\n+    \'NO_GENE_SYMBOL No Description\',\r\n+    \'No Organism_Name\',\r\n+    0,\r\n+    \'$FAILED_MATCH_GENE_NAME\',\r\n+    \'0\',\r\n+    \'0\',\r\n+    \'$FAILED_MATCH_SEQ\',\r\n+    \'No Database\'\r\n+  )\r\n+  ");\r\n+  if (not $stmth->execute()) {\r\n+      print "Error inserting dummy row into UniProtKB: $stmth->errstr\\n";\r\n+  }\r\n+  $dbh->disconnect if ( defined $dbh );\r\n+######################\r\n+\r\n @timeData = localtime(time);\r\n print "\\n--- Start search at " . format_localtime_iso8601() ."\\n";\r\n \r\n@@ -579,6 +614,7 @@\n   $i = system("\\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose");\r\n } else {\r\n   $i = system("\\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in");\r\n+  #ACE DELETEME $i = system("\\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose");\r\n }\r\n if ($i) {\r\n   print "python $dirname/search_ppep.py -u $db_out -p $file_in\\n  exited with exit code $i\\n";\r\n@@ -628,7 +664,7 @@\n \r\n my %ppep_to_count_lut;\r\n print "start select peptide counts " . format_localtime_iso8601() . "\\n";\r\n-$stmth = $dbh->prepare("\r\n+my $uniprotkb_pep_ppep_view_stmth = $dbh->prepare("\r\n     SELECT DISTINCT\r\n       phosphopeptide\r\n     , count(*) as i\r\n@@ -639,10 +675,10 @@\n     ORDER BY\r\n       phosphopeptide\r\n ");\r\n-if (not $stmth->execute()) {\r\n-    die "Error fetching peptide counts: $stmth->errstr\\n";\r\n+if (not $uniprotkb_pep_ppep_view_stmth->execute()) {\r\n+    die "Error fetching peptide counts: $uniprotkb_pep_ppep_view_stmth->errstr\\n";\r\n }\r\n-while (my @row = $stmth->fetchrow_array) {\r\n+while (my @row = $uniprotkb_pep_ppep_view_stmth->fetchrow_array) {\r\n   $ppep_to_count_lut{$row[0]} = $row[1];\r\n   #print "\\$ppep_to_count_lut{$row[0]} = $ppep_to_count_lut{$row[0]}\\n";\r\n }\r\n@@ -662,7 +698,7 @@\n \r\n my %ppep_to_row_lut;\r\n print "start select all records without qualification " . format_localtime_iso8601() . "\\n";\r\n-$stmth = $dbh->prepare("\r\n+$uniprotkb_pep_ppep_view_stmth = $dbh->prepare("\r\n     SELECT DISTINCT\r\n       accession\r\n     , peptide\r\n@@ -679,8 +715,8 @@\n     ORDER BY\r\n       phosphopeptide\r\n ");\r\n-if (not $stmth->execute()) {\r\n-    die "Error fetching all records without qualification: $stmth->errstr\\n";\r\n+if (not $uniprotkb_pep_ppep_view_stmth->execute()) {\r\n+    die "Error fetching all records without qualification: $uniprotkb_pep_ppep_view_stmth->errstr\\n";\r\n }\r\n my $current_ppep;\r\n my $counter = 0;\r\n@@ -689,7 +725,7 @@\n @tmp_accessions = ();\r\n @tmp_names = ();\r\n @tmp_sites = ();\r\n-while (my @row = $stmth->fetchrow_array) {\r\n+while (my @row = $uniprotkb_pep_ppep_view_stmth->fetchrow_array) {\r\n     # Identify phosphopeptide for current ro'..b'                    my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1;        # added 12.05.2012 for Justin\'s data\r\n-                            print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";\r\n-                            $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";\r\n-                        }\r\n-                        elsif ($j == $#tmp_p_residues) {\r\n-                            my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1;        # added 12.05.2012 for Justin\'s data\r\n-                            print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing\\t";\r\n-                            $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing";\r\n+                    if ($#tmp_p_residues > 0) {\r\n+                        for my $j (0 .. $#tmp_p_residues) {\r\n+                            if ($j < $#tmp_p_residues) {\r\n+                                if (defined $p_residues{$peptide}{$i}[$j]) {\r\n+                                    my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1;        # added 12.05.2012 for Justin\'s data\r\n+                                    $foobar = $residues{$peptide}{$i}[$j];\r\n+                                    if (defined $foobar) {\r\n+                                        print OUT "$foobar";\r\n+                                        print OUT "$tmp_site_for_printing, ";\r\n+                                        $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";\r\n+                                        $there_were_sites = 1;\r\n+                                    }\r\n+                                }\r\n+                            }\r\n+                            elsif ($j == $#tmp_p_residues) {\r\n+                                if (defined $p_residues{$peptide}{$i}[$j]) {\r\n+                                    $foobar = $residues{$peptide}{$i}[$j];\r\n+                                    if (defined $foobar) {\r\n+                                        my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1;        # added 12.05.2012 for Justin\'s data\r\n+                                        print OUT "$foobar";\r\n+                                        print OUT "$tmp_site_for_printing\\t";\r\n+                                        #ACE print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing\\t";\r\n+                                        $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing";\r\n+                                        $there_were_sites = 1;\r\n+                                    }\r\n+                                }\r\n+                            }\r\n                         }\r\n                     }\r\n-                } else {\r\n+                }\r\n+                if (0 == $there_were_sites) {\r\n                   print OUT "\\t";\r\n                 }\r\n             }\r\n         }\r\n+        print "tmp_for_insert \'$tmp_for_insert\' for \'$peptide\'\\n" if $verbose_cond;\r\n         push (@ppep_metadata, $tmp_for_insert);\r\n \r\n-        # Print the UNIQUE phospho-motifs\r\n-        # Column 6:\r\n+        # Column 6: UNIQUE phospho-motifs\r\n         print OUT join(" /// ", @{$unique_motifs{$peptide}}), "\\t";\r\n         push (@ppep_metadata, join(" /// ", @{$unique_motifs{$peptide}}));\r\n \r\n-        # Print the accessions\r\n-        # Column 7:\r\n+        # Column 7: accessions\r\n         if (defined $accessions{$peptide}) {\r\n             print OUT join(" /// ", @{$accessions{$peptide}}), "\\t";\r\n             push (@ppep_metadata, join(" /// ", @{$accessions{$peptide}}));\r\n@@ -1967,8 +2019,7 @@\n             push (@ppep_metadata, "");\r\n         }\r\n \r\n-        # print ALL motifs with residue numbers\r\n-        # Column 8:\r\n+        # Column 8: ALL motifs with residue numbers\r\n         if (defined $p_motifs{$peptide}) {\r\n             print OUT join(" /// ", @{$p_motifs{$peptide}}), "\\t";\r\n             push (@ppep_metadata, join(" /// ", @{$p_motifs{$peptide}}));\r\n'
b
diff -r d4d531006735 -r 922d309640db mqppep_mrgfltr.py
--- a/mqppep_mrgfltr.py Thu Mar 10 23:42:48 2022 +0000
+++ b/mqppep_mrgfltr.py Fri Mar 11 20:04:05 2022 +0000
[
@@ -44,6 +44,19 @@
         return None
 
 
+def whine(func, *args, handle=lambda e: e, **kwargs):
+
+    try:
+        return func(*args, **kwargs)
+    except Exception as e:
+        print("Warning: For %s" % str(func))
+        print("  with args %s" % str(args))
+        print("  caught exception: %s" % str(e))
+        (ty, va, tb) = sys.exc_info()
+        print("  stack trace: " + str(traceback.format_exception(ty, va, tb)))
+        return None
+
+
 def ppep_join(x):
     x = [i for i in x if N_A != i]
     result = "%s" % " | ".join(x)
@@ -683,16 +696,16 @@
             #             message = pe.message))
             #             )
             #         )
+            if phospho_pep not in PhosphoPep_UniProtSeq_LUT:
+                raise PreconditionError(
+                    phospho_pep,
+                    "no matching phosphopeptide found in PhosphoPep_UniProtSeq_LUT",
+                )
             if dephospho_pep not in DephosphoPep_UniProtSeq_LUT:
                 raise PreconditionError(
                     dephospho_pep,
                     "dephosphorylated phosphopeptide not found in DephosphoPep_UniProtSeq_LUT",
                 )
-            if phospho_pep not in PhosphoPep_UniProtSeq_LUT:
-                raise PreconditionError(
-                    dephospho_pep,
-                    "no matching phosphopeptide found in PhosphoPep_UniProtSeq_LUT",
-                )
             if (
                 dephospho_pep
                 != PhosphoPep_UniProtSeq_LUT[(phospho_pep, DEPHOSPHOPEP)]
@@ -739,6 +752,10 @@
                 seq10s = []
                 while i < len(ploc):
                     start = UniProtSeq.find(dephospho_pep)
+                    # handle case where no sequence was found for dep-pep
+                    if start < 0:
+                        i += 1
+                        continue
                     psite = (
                         start + ploc[i]
                     )  # location of phosphoresidue on protein sequence
@@ -869,7 +886,6 @@
                             # val is a string
                             if val not in joined_set:
                                 joined_set.add(val)
-                                # joined_value += sep + '; '.join(map(str, val))
                                 joined_value += sep + val
                                 sep = "; "
                     # joined_value is a string
@@ -914,9 +930,10 @@
             return [phospho_pep, result]
 
         # Construct list of [string, dictionary] lists
-        #   where the dictionary provides the SwissProt metadata for a phosphopeptide
+        #   where the dictionary provides the SwissProt metadata
+        #   for a phosphopeptide
         result_list = [
-            catch(pseq_to_subdict, psequence)
+            whine(pseq_to_subdict, psequence)
             for psequence in data_in[PHOSPHOPEPTIDE_MATCH]
         ]
 
@@ -1021,7 +1038,8 @@
             how="left",
         )
 
-        # after merging df, select only the columns of interest - note that PROTEIN is absent here
+        # after merging df, select only the columns of interest;
+        #   note that PROTEIN is absent here
         merge_df = merge_df[
             [
                 PHOSPHOPEPTIDE,
@@ -1033,7 +1051,8 @@
                 ON_NOTES,
             ]
         ]
-        # combine column values of interest into one FUNCTION_PHOSPHORESIDUE column"
+        # combine column values of interest
+        #   into one FUNCTION_PHOSPHORESIDUE column"
         merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[ON_FUNCTION].str.cat(
             merge_df[ON_PROCESS], sep="; ", na_rep=""
         )
b
diff -r d4d531006735 -r 922d309640db repository_dependencies.xml
--- a/repository_dependencies.xml Thu Mar 10 23:42:48 2022 +0000
+++ b/repository_dependencies.xml Fri Mar 11 20:04:05 2022 +0000
b
@@ -1,5 +1,5 @@
 <?xml version="1.0" ?>
 <repositories description="Suite for preprocessing and ANOVA of MaxQuant results using LC-MS proteomics data from phosphoproteomic enrichment.">
-    <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="8be24b489992"/>
-    <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="cfc65ae227f8"/>
+    <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="b91809a18dbe"/>
+    <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="d4d531006735"/>
 </repositories>
\ No newline at end of file
b
diff -r d4d531006735 -r 922d309640db search_ppep.py
--- a/search_ppep.py Thu Mar 10 23:42:48 2022 +0000
+++ b/search_ppep.py Fri Mar 11 20:04:05 2022 +0000
[
@@ -516,6 +516,16 @@
     for row in ker.fetchall():
         print(row[0])
 
+    # link peptides not found in sequence database to a dummy sequence-record
+    ker.execute(
+        """
+        INSERT INTO deppep_UniProtKB(deppep_id,UniProtKB_id,pos_start,pos_end)
+          SELECT id, 'No Uniprot_ID', 0, 0
+          FROM   deppep
+          WHERE  id NOT IN (SELECT deppep_id FROM deppep_UniProtKB)
+        """
+    )
+
     con.commit()
     ker.execute("vacuum")
     con.close()