Mercurial > repos > eschen42 > mqppep_anova
comparison PhosphoPeptide_Upstream_Kinase_Mapping.pl @ 6:922d309640db draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
author | eschen42 |
---|---|
date | Fri, 11 Mar 2022 20:04:05 +0000 |
parents | c1403d18c189 |
children | d728198f1ba5 |
comparison
equal
deleted
inserted
replaced
5:d4d531006735 | 6:922d309640db |
---|---|
23 # | 23 # |
24 # | 24 # |
25 ############################################################################################################################### | 25 ############################################################################################################################### |
26 | 26 |
27 use strict; | 27 use strict; |
28 use warnings; | 28 #ACE use warnings; |
29 use warnings 'FATAL' => 'all'; | |
29 | 30 |
30 use Getopt::Std; | 31 use Getopt::Std; |
31 use DBD::SQLite::Constants qw/:file_open/; | 32 use DBD::SQLite::Constants qw/:file_open/; |
32 use DBI qw(:sql_types); | 33 use DBI qw(:sql_types); |
33 use File::Copy; | 34 use File::Copy; |
35 use POSIX qw(strftime); | 36 use POSIX qw(strftime); |
36 use Time::HiRes qw(gettimeofday); | 37 use Time::HiRes qw(gettimeofday); |
37 #use Data::Dump qw(dump); | 38 #use Data::Dump qw(dump); |
38 | 39 |
39 my $USE_SEARCH_PPEP_PY = 1; | 40 my $USE_SEARCH_PPEP_PY = 1; |
41 #my $FAILED_MATCH_SEQ = "Failed match"; | |
42 my $FAILED_MATCH_SEQ = 'No Sequence'; | |
43 my $FAILED_MATCH_GENE_NAME = 'No_Gene_Name'; | |
40 | 44 |
41 my $dirname = dirname(__FILE__); | 45 my $dirname = dirname(__FILE__); |
42 my %opts; | 46 my %opts; |
43 my ($file_in, $average_or_sum, $db_out, $file_out, $file_melt, $phospho_type); | 47 my ($file_in, $average_or_sum, $db_out, $file_out, $file_melt, $phospho_type); |
44 my $dbtype; | 48 my $dbtype; |
45 my ($fasta_in, $networkin_in, $motifs_in, $PSP_Kinase_Substrate_in, $PSP_Regulatory_Sites_in); | 49 my ($fasta_in, $networkin_in, $motifs_in, $PSP_Kinase_Substrate_in, $PSP_Regulatory_Sites_in); |
46 my (@samples, %sample_id_lut, %ppep_id_lut, %data, @tmp_data, %n); | 50 my (@samples, %sample_id_lut, %ppep_id_lut, %data, @tmp_data, %n); |
47 my $line = 0; | 51 my $line = 0; |
48 my @failed_match = ("Failed match"); | 52 my @failed_match = ($FAILED_MATCH_SEQ); |
49 my @failed_matches; | 53 my @failed_matches; |
50 my (%all_data); | 54 my (%all_data); |
51 my (@p_peptides, @non_p_peptides); | 55 my (@p_peptides, @non_p_peptides); |
52 my @parsed_fasta; | 56 my @parsed_fasta; |
53 my (@accessions, @names, @sequences, @databases, $database); | 57 my (@accessions, @names, @sequences, @databases, $database); |
569 $dbtype = "SQLite"; | 573 $dbtype = "SQLite"; |
570 } | 574 } |
571 | 575 |
572 print "$#accessions accessions were read from the UniProtKB/Swiss-Prot $dbtype file\n"; | 576 print "$#accessions accessions were read from the UniProtKB/Swiss-Prot $dbtype file\n"; |
573 | 577 |
578 ###################### | |
579 $dbh = DBI->connect("dbi:SQLite:$dbfile", undef, undef); | |
580 $stmth = $dbh->prepare(" | |
581 INSERT INTO UniProtKB ( | |
582 Uniprot_ID, | |
583 Description, | |
584 Organism_Name, | |
585 Organism_ID, | |
586 Gene_Name, | |
587 PE, | |
588 SV, | |
589 Sequence, | |
590 Database | |
591 ) VALUES ( | |
592 'No Uniprot_ID', | |
593 'NO_GENE_SYMBOL No Description', | |
594 'No Organism_Name', | |
595 0, | |
596 '$FAILED_MATCH_GENE_NAME', | |
597 '0', | |
598 '0', | |
599 '$FAILED_MATCH_SEQ', | |
600 'No Database' | |
601 ) | |
602 "); | |
603 if (not $stmth->execute()) { | |
604 print "Error inserting dummy row into UniProtKB: $stmth->errstr\n"; | |
605 } | |
606 $dbh->disconnect if ( defined $dbh ); | |
607 ###################### | |
608 | |
574 @timeData = localtime(time); | 609 @timeData = localtime(time); |
575 print "\n--- Start search at " . format_localtime_iso8601() ."\n"; | 610 print "\n--- Start search at " . format_localtime_iso8601() ."\n"; |
576 | 611 |
577 print " --> Calling 'search_ppep' script\n\n"; | 612 print " --> Calling 'search_ppep' script\n\n"; |
578 if ($verbose) { | 613 if ($verbose) { |
579 $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose"); | 614 $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose"); |
580 } else { | 615 } else { |
581 $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in"); | 616 $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in"); |
617 #ACE DELETEME $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose"); | |
582 } | 618 } |
583 if ($i) { | 619 if ($i) { |
584 print "python $dirname/search_ppep.py -u $db_out -p $file_in\n exited with exit code $i\n"; | 620 print "python $dirname/search_ppep.py -u $db_out -p $file_in\n exited with exit code $i\n"; |
585 die "Search failed for phosphopeptides in SwissProt/SQLite file."; | 621 die "Search failed for phosphopeptides in SwissProt/SQLite file."; |
586 } | 622 } |
626 # AND deppep_UniProtKB.UniprotKB_ID = UniProtKB.Uniprot_ID | 662 # AND deppep_UniProtKB.UniprotKB_ID = UniProtKB.Uniprot_ID |
627 # ORDER BY UniprotKB_ID, deppep.seq, ppep.seq; | 663 # ORDER BY UniprotKB_ID, deppep.seq, ppep.seq; |
628 | 664 |
629 my %ppep_to_count_lut; | 665 my %ppep_to_count_lut; |
630 print "start select peptide counts " . format_localtime_iso8601() . "\n"; | 666 print "start select peptide counts " . format_localtime_iso8601() . "\n"; |
631 $stmth = $dbh->prepare(" | 667 my $uniprotkb_pep_ppep_view_stmth = $dbh->prepare(" |
632 SELECT DISTINCT | 668 SELECT DISTINCT |
633 phosphopeptide | 669 phosphopeptide |
634 , count(*) as i | 670 , count(*) as i |
635 FROM | 671 FROM |
636 uniprotkb_pep_ppep_view | 672 uniprotkb_pep_ppep_view |
637 GROUP BY | 673 GROUP BY |
638 phosphopeptide | 674 phosphopeptide |
639 ORDER BY | 675 ORDER BY |
640 phosphopeptide | 676 phosphopeptide |
641 "); | 677 "); |
642 if (not $stmth->execute()) { | 678 if (not $uniprotkb_pep_ppep_view_stmth->execute()) { |
643 die "Error fetching peptide counts: $stmth->errstr\n"; | 679 die "Error fetching peptide counts: $uniprotkb_pep_ppep_view_stmth->errstr\n"; |
644 } | 680 } |
645 while (my @row = $stmth->fetchrow_array) { | 681 while (my @row = $uniprotkb_pep_ppep_view_stmth->fetchrow_array) { |
646 $ppep_to_count_lut{$row[0]} = $row[1]; | 682 $ppep_to_count_lut{$row[0]} = $row[1]; |
647 #print "\$ppep_to_count_lut{$row[0]} = $ppep_to_count_lut{$row[0]}\n"; | 683 #print "\$ppep_to_count_lut{$row[0]} = $ppep_to_count_lut{$row[0]}\n"; |
648 } | 684 } |
649 | 685 |
650 # accession, peptide, sequence, description, phosphopeptide, long_description, pos_start, pos_end, scrubbed, ppep_id | 686 # accession, peptide, sequence, description, phosphopeptide, long_description, pos_start, pos_end, scrubbed, ppep_id |
660 my $COL_SCRUBBED = 8; | 696 my $COL_SCRUBBED = 8; |
661 my $COL_PPEP_ID = 9; | 697 my $COL_PPEP_ID = 9; |
662 | 698 |
663 my %ppep_to_row_lut; | 699 my %ppep_to_row_lut; |
664 print "start select all records without qualification " . format_localtime_iso8601() . "\n"; | 700 print "start select all records without qualification " . format_localtime_iso8601() . "\n"; |
665 $stmth = $dbh->prepare(" | 701 $uniprotkb_pep_ppep_view_stmth = $dbh->prepare(" |
666 SELECT DISTINCT | 702 SELECT DISTINCT |
667 accession | 703 accession |
668 , peptide | 704 , peptide |
669 , sequence | 705 , sequence |
670 , description | 706 , description |
677 FROM | 713 FROM |
678 uniprotkb_pep_ppep_view | 714 uniprotkb_pep_ppep_view |
679 ORDER BY | 715 ORDER BY |
680 phosphopeptide | 716 phosphopeptide |
681 "); | 717 "); |
682 if (not $stmth->execute()) { | 718 if (not $uniprotkb_pep_ppep_view_stmth->execute()) { |
683 die "Error fetching all records without qualification: $stmth->errstr\n"; | 719 die "Error fetching all records without qualification: $uniprotkb_pep_ppep_view_stmth->errstr\n"; |
684 } | 720 } |
685 my $current_ppep; | 721 my $current_ppep; |
686 my $counter = 0; | 722 my $counter = 0; |
687 my $former_ppep = ""; | 723 my $former_ppep = ""; |
688 @tmp_matches = (); | 724 @tmp_matches = (); |
689 @tmp_accessions = (); | 725 @tmp_accessions = (); |
690 @tmp_names = (); | 726 @tmp_names = (); |
691 @tmp_sites = (); | 727 @tmp_sites = (); |
692 while (my @row = $stmth->fetchrow_array) { | 728 while (my @row = $uniprotkb_pep_ppep_view_stmth->fetchrow_array) { |
693 # Identify phosphopeptide for current row; | 729 # Identify phosphopeptide for current row; |
694 # it is an error for it to change when the counter is not zero. | 730 # it is an error for it to change when the counter is not zero. |
695 $current_ppep = $row[$COL_PHOSPHOPEPTIDE]; | 731 $current_ppep = $row[$COL_PHOSPHOPEPTIDE]; |
696 | 732 |
697 # when counter is zero, prepare for a new phosphopeptide | 733 # when counter is zero, prepare for a new phosphopeptide |
831 | 867 |
832 my $total_length = $desired_residues_L + $tmp_p_residues[$#tmp_p_residues] - $tmp_p_residues[0] + $desired_residues_R + 1; | 868 my $total_length = $desired_residues_L + $tmp_p_residues[$#tmp_p_residues] - $tmp_p_residues[0] + $desired_residues_R + 1; |
833 my $arg2 = $tmp_p_residues[0] - $desired_residues_L; | 869 my $arg2 = $tmp_p_residues[0] - $desired_residues_L; |
834 my $arg1 = $matched_sequences{$peptide_to_match}[$i]; | 870 my $arg1 = $matched_sequences{$peptide_to_match}[$i]; |
835 | 871 |
836 if (length($arg1) > $arg2 + $total_length - 1) { | 872 if (($total_length > 0) && (length($arg1) > $arg2 + $total_length - 1)) { |
873 #ACE print "\$tmp_motif = substr($arg1, $arg2, $total_length)\n"; | |
837 $tmp_motif = substr($arg1, $arg2, $total_length); | 874 $tmp_motif = substr($arg1, $arg2, $total_length); |
838 #ACE print "tmp_motif = $tmp_motif\ti = $i\tpeptide_to_match = $peptide_to_match\tmatched_sequences{peptide_to_match}[i] = $matched_sequences{$peptide_to_match}[$i]\targ2 = $arg2\targ3 = $total_length\n"; | 875 #ACE print "tmp_motif = $tmp_motif\ti = $i\tpeptide_to_match = $peptide_to_match\tmatched_sequences{peptide_to_match}[i] = $matched_sequences{$peptide_to_match}[$i]\targ2 = $arg2\targ3 = $total_length\n"; |
839 | 876 |
840 # Put the "p" back in front of the appropriate phospho-residue(s). | 877 # Put the "p" back in front of the appropriate phospho-residue(s). |
841 my (@tmp_residues, $tmp_position); | 878 my (@tmp_residues, $tmp_position); |
1662 | 1699 |
1663 ($start_seconds, $start_microseconds) = gettimeofday; | 1700 ($start_seconds, $start_microseconds) = gettimeofday; |
1664 | 1701 |
1665 print "Writing PSP_Regulatory_site records\n"; | 1702 print "Writing PSP_Regulatory_site records\n"; |
1666 | 1703 |
1667 #ACE $stmth = $dbh->prepare(" | 1704 my $psp_regulatory_site_stmth = $dbh->prepare(" |
1668 #ACE INSERT INTO PSP_Regulatory_site ( | |
1669 #ACE DOMAIN, | |
1670 #ACE ON_FUNCTION, | |
1671 #ACE ON_PROCESS, | |
1672 #ACE ON_PROT_INTERACT, | |
1673 #ACE ON_OTHER_INTERACT, | |
1674 #ACE NOTES, | |
1675 #ACE SITE_PLUSMINUS_7AA, | |
1676 #ACE ORGANISM, | |
1677 #ACE PROTEIN | |
1678 #ACE ) VALUES (?,?,?,?,?,?,?,?,?) | |
1679 #ACE "); | |
1680 | |
1681 $stmth = $dbh->prepare(" | |
1682 INSERT INTO PSP_Regulatory_site ( | 1705 INSERT INTO PSP_Regulatory_site ( |
1683 DOMAIN, | 1706 DOMAIN, |
1684 ON_FUNCTION, | 1707 ON_FUNCTION, |
1685 ON_PROCESS, | 1708 ON_PROCESS, |
1686 ON_PROT_INTERACT, | 1709 ON_PROT_INTERACT, |
1692 "); | 1715 "); |
1693 | 1716 |
1694 foreach my $peptide (keys %data) { | 1717 foreach my $peptide (keys %data) { |
1695 if (exists($domain_2{$peptide}) and (defined $domain_2{$peptide}) and (not $domain_2{$peptide} eq "") ) { | 1718 if (exists($domain_2{$peptide}) and (defined $domain_2{$peptide}) and (not $domain_2{$peptide} eq "") ) { |
1696 #ACE print "writing domain $domain_2{$peptide} for regulatory site(s) $seq_plus7aa_2{$peptide}\n"; #ACE | 1719 #ACE print "writing domain $domain_2{$peptide} for regulatory site(s) $seq_plus7aa_2{$peptide}\n"; #ACE |
1697 $stmth->bind_param(1, $domain_2{$peptide}); | 1720 $psp_regulatory_site_stmth->bind_param(1, $domain_2{$peptide}); |
1698 $stmth->bind_param(2, $ON_FUNCTION_2{$peptide}); | 1721 $psp_regulatory_site_stmth->bind_param(2, $ON_FUNCTION_2{$peptide}); |
1699 $stmth->bind_param(3, $ON_PROCESS_2{$peptide}); | 1722 $psp_regulatory_site_stmth->bind_param(3, $ON_PROCESS_2{$peptide}); |
1700 $stmth->bind_param(4, $ON_PROT_INTERACT_2{$peptide}); | 1723 $psp_regulatory_site_stmth->bind_param(4, $ON_PROT_INTERACT_2{$peptide}); |
1701 $stmth->bind_param(5, $ON_OTHER_INTERACT_2{$peptide}); | 1724 $psp_regulatory_site_stmth->bind_param(5, $ON_OTHER_INTERACT_2{$peptide}); |
1702 $stmth->bind_param(6, $notes_2{$peptide}); | 1725 $psp_regulatory_site_stmth->bind_param(6, $notes_2{$peptide}); |
1703 $stmth->bind_param(7, $seq_plus7aa_2{$peptide}); | 1726 $psp_regulatory_site_stmth->bind_param(7, $seq_plus7aa_2{$peptide}); |
1704 $stmth->bind_param(8, $organism_2{$peptide}); | 1727 $psp_regulatory_site_stmth->bind_param(8, $organism_2{$peptide}); |
1705 #ACE $stmth->bind_param(9, $psp_regsite_protein_2{$peptide}); | 1728 if (not $psp_regulatory_site_stmth->execute()) { |
1706 if (not $stmth->execute()) { | 1729 print "Error writing PSP_Regulatory_site for one regulatory site with peptide '$domain_2{$peptide}': $psp_regulatory_site_stmth->errstr\n"; |
1707 print "Error writing PSP_Regulatory_site for one regulatory site with peptide '$domain_2{$peptide}': $stmth->errstr\n"; | |
1708 } else { | 1730 } else { |
1709 #ACE print "added domain for $domain_2{$peptide}\n"; | 1731 #ACE print "added domain for $domain_2{$peptide}\n"; |
1710 } | 1732 } |
1711 } elsif (exists($domain_2{$peptide}) and (not defined $domain_2{$peptide})) { | 1733 } elsif (exists($domain_2{$peptide}) and (not defined $domain_2{$peptide})) { |
1712 print "\$domain_2{$peptide} is undefined\n"; #ACE | 1734 print "\$domain_2{$peptide} is undefined\n"; #ACE |
1713 } | 1735 } |
1714 } | 1736 } |
1715 | 1737 |
1716 $dbh->{AutoCommit} = $auto_commit; | 1738 $dbh->{AutoCommit} = $auto_commit; |
1717 # auto_commit implicitly finishes stmth, apparently # $stmth->finish; | 1739 # auto_commit implicitly finishes psp_regulatory_site_stmth, apparently # $psp_regulatory_site_stmth->finish; |
1718 $dbh->disconnect if ( defined $dbh ); | 1740 $dbh->disconnect if ( defined $dbh ); |
1719 | 1741 |
1720 | 1742 |
1721 ($end_seconds, $end_microseconds) = gettimeofday; | 1743 ($end_seconds, $end_microseconds) = gettimeofday; |
1722 | 1744 |
1870 # column 1: p-peptide | 1892 # column 1: p-peptide |
1871 print OUT "$peptide\t"; | 1893 print OUT "$peptide\t"; |
1872 push (@ppep_metadata, $ppep_id); | 1894 push (@ppep_metadata, $ppep_id); |
1873 push (@ppep_intensity, $peptide); | 1895 push (@ppep_intensity, $peptide); |
1874 | 1896 |
1897 my $verbose_cond = 0; # $peptide eq 'AAAAAAAGDpSDpSWDADAFSVEDPVR' || $peptide eq 'KKGGpSpSDEGPEPEAEEpSDLDSGSVHSASGRPDGPVR'; | |
1875 # skip over failed matches | 1898 # skip over failed matches |
1876 if ($matched_sequences{$peptide} eq "Failed match") { | 1899 print "\nfirst match for '$peptide' is '$matched_sequences{$peptide}[0]' and FAILED_MATCH_SEQ is '$FAILED_MATCH_SEQ'\n" if $verbose_cond; |
1900 if ($matched_sequences{$peptide}[0] eq $FAILED_MATCH_SEQ) { | |
1901 # column 2: Protein description | |
1902 # column 3: Gene name(s) | |
1903 # column 4: FASTA name | |
1904 # column 5: phospho-residues | |
1905 # Column 6: UNIQUE phospho-motifs | |
1906 # Column 7: accessions | |
1907 # Column 8: ALL motifs with residue numbers | |
1908 # 2 3 4 5 6 7 8 | |
1877 print OUT "Sequence not found in FASTA database\tNA\tNA\tNA\tNA\tNA\tNA\t"; | 1909 print OUT "Sequence not found in FASTA database\tNA\tNA\tNA\tNA\tNA\tNA\t"; |
1910 print "No match found for '$peptide' in sequence database\n"; | |
1911 $gene_names = '$FAILED_MATCH_GENE_NAME'; | |
1878 } else { | 1912 } else { |
1879 my @description = (); | 1913 my @description = (); |
1880 my %seen = (); | 1914 my %seen = (); |
1881 # Print just the protein description | 1915 # Print just the protein description |
1882 for $i (0 .. $#{$names{$peptide}}) { | 1916 for $i (0 .. $#{$names{$peptide}}) { |
1902 # column 3: Gene name(s) | 1936 # column 3: Gene name(s) |
1903 $gene_names = join(" /// ", @gene); | 1937 $gene_names = join(" /// ", @gene); |
1904 print OUT $gene_names, "\t"; | 1938 print OUT $gene_names, "\t"; |
1905 push (@ppep_metadata, join(" /// ", @gene)); | 1939 push (@ppep_metadata, join(" /// ", @gene)); |
1906 | 1940 |
1907 # print the entire names | |
1908 # column 4: FASTA name | 1941 # column 4: FASTA name |
1909 print OUT join(" /// ", @{$names{$peptide}}), "\t"; | 1942 print OUT join(" /// ", @{$names{$peptide}}), "\t"; |
1910 push (@ppep_metadata, join(" /// ", @{$names{$peptide}})); | 1943 push (@ppep_metadata, join(" /// ", @{$names{$peptide}})); |
1911 | 1944 |
1912 # Print the phospho-residues | 1945 # column 5: phospho-residues |
1913 # column 5: | |
1914 my $tmp_for_insert = ""; | 1946 my $tmp_for_insert = ""; |
1947 my $foobar; | |
1915 for my $i (0 .. $#{ $matched_sequences{$peptide} } ) { | 1948 for my $i (0 .. $#{ $matched_sequences{$peptide} } ) { |
1949 print "match $i for '$peptide' is '$matched_sequences{$peptide}[$i]'\n" if $verbose_cond; | |
1916 if ($i < $#{ $matched_sequences{$peptide} }) { | 1950 if ($i < $#{ $matched_sequences{$peptide} }) { |
1917 if (defined $p_residues{$peptide}{$i}) { | 1951 if (defined $p_residues{$peptide}{$i}) { |
1918 @tmp_p_residues = @{$p_residues{$peptide}{$i}}; | 1952 @tmp_p_residues = @{$p_residues{$peptide}{$i}}; |
1919 for $j (0 .. $#tmp_p_residues) { | 1953 for $j (0 .. $#tmp_p_residues) { |
1920 if ($j < $#tmp_p_residues) { | 1954 if ($j < $#tmp_p_residues) { |
1929 } | 1963 } |
1930 } | 1964 } |
1931 } | 1965 } |
1932 } | 1966 } |
1933 elsif ($i == $#{ $matched_sequences{$peptide} }) { | 1967 elsif ($i == $#{ $matched_sequences{$peptide} }) { |
1968 my $there_were_sites = 0; | |
1934 if (defined $p_residues{$peptide}{$i}) { | 1969 if (defined $p_residues{$peptide}{$i}) { |
1935 @tmp_p_residues = @{$p_residues{$peptide}{$i}}; | 1970 @tmp_p_residues = @{$p_residues{$peptide}{$i}}; |
1936 for my $j (0 .. $#tmp_p_residues) { | 1971 if ($#tmp_p_residues > 0) { |
1937 if ($j < $#tmp_p_residues) { | 1972 for my $j (0 .. $#tmp_p_residues) { |
1938 my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data | 1973 if ($j < $#tmp_p_residues) { |
1939 print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, "; | 1974 if (defined $p_residues{$peptide}{$i}[$j]) { |
1940 $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, "; | 1975 my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data |
1976 $foobar = $residues{$peptide}{$i}[$j]; | |
1977 if (defined $foobar) { | |
1978 print OUT "$foobar"; | |
1979 print OUT "$tmp_site_for_printing, "; | |
1980 $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, "; | |
1981 $there_were_sites = 1; | |
1982 } | |
1983 } | |
1984 } | |
1985 elsif ($j == $#tmp_p_residues) { | |
1986 if (defined $p_residues{$peptide}{$i}[$j]) { | |
1987 $foobar = $residues{$peptide}{$i}[$j]; | |
1988 if (defined $foobar) { | |
1989 my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data | |
1990 print OUT "$foobar"; | |
1991 print OUT "$tmp_site_for_printing\t"; | |
1992 #ACE print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing\t"; | |
1993 $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing"; | |
1994 $there_were_sites = 1; | |
1995 } | |
1996 } | |
1997 } | |
1941 } | 1998 } |
1942 elsif ($j == $#tmp_p_residues) { | 1999 } |
1943 my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data | 2000 } |
1944 print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing\t"; | 2001 if (0 == $there_were_sites) { |
1945 $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing"; | |
1946 } | |
1947 } | |
1948 } else { | |
1949 print OUT "\t"; | 2002 print OUT "\t"; |
1950 } | 2003 } |
1951 } | 2004 } |
1952 } | 2005 } |
2006 print "tmp_for_insert '$tmp_for_insert' for '$peptide'\n" if $verbose_cond; | |
1953 push (@ppep_metadata, $tmp_for_insert); | 2007 push (@ppep_metadata, $tmp_for_insert); |
1954 | 2008 |
1955 # Print the UNIQUE phospho-motifs | 2009 # Column 6: UNIQUE phospho-motifs |
1956 # Column 6: | |
1957 print OUT join(" /// ", @{$unique_motifs{$peptide}}), "\t"; | 2010 print OUT join(" /// ", @{$unique_motifs{$peptide}}), "\t"; |
1958 push (@ppep_metadata, join(" /// ", @{$unique_motifs{$peptide}})); | 2011 push (@ppep_metadata, join(" /// ", @{$unique_motifs{$peptide}})); |
1959 | 2012 |
1960 # Print the accessions | 2013 # Column 7: accessions |
1961 # Column 7: | |
1962 if (defined $accessions{$peptide}) { | 2014 if (defined $accessions{$peptide}) { |
1963 print OUT join(" /// ", @{$accessions{$peptide}}), "\t"; | 2015 print OUT join(" /// ", @{$accessions{$peptide}}), "\t"; |
1964 push (@ppep_metadata, join(" /// ", @{$accessions{$peptide}})); | 2016 push (@ppep_metadata, join(" /// ", @{$accessions{$peptide}})); |
1965 } else { | 2017 } else { |
1966 print OUT "\t"; | 2018 print OUT "\t"; |
1967 push (@ppep_metadata, ""); | 2019 push (@ppep_metadata, ""); |
1968 } | 2020 } |
1969 | 2021 |
1970 # print ALL motifs with residue numbers | 2022 # Column 8: ALL motifs with residue numbers |
1971 # Column 8: | |
1972 if (defined $p_motifs{$peptide}) { | 2023 if (defined $p_motifs{$peptide}) { |
1973 print OUT join(" /// ", @{$p_motifs{$peptide}}), "\t"; | 2024 print OUT join(" /// ", @{$p_motifs{$peptide}}), "\t"; |
1974 push (@ppep_metadata, join(" /// ", @{$p_motifs{$peptide}})); | 2025 push (@ppep_metadata, join(" /// ", @{$p_motifs{$peptide}})); |
1975 } else { | 2026 } else { |
1976 print OUT "\t"; | 2027 print OUT "\t"; |