annotate PhosphoPeptide_Upstream_Kinase_Mapping.pl @ 19:d7f27d0fca7e draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 13c001168047caa2eefb6b37727ba8974e54a025"
author eschen42
date Wed, 06 Apr 2022 18:07:47 +0000
parents 196b84357e7e
children ee7dedf9c79b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1 #!/usr/local/bin/perl
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
3 # perl Kinase_enrichment_analysis_complete_v0.pl
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
4 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
5 # Nick Graham, USC
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
6 # 2016-02-27
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
7 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
8 # Built from scripts written by NG at UCLA in Tom Graeber's lab:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
9 # CombinePhosphoSites.pl
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
10 # Retrieve_p_motifs.pl
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
11 # NetworKIN_Motif_Finder_v7.pl
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
12 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
13 # Given a list of phospho-peptides, find protein information and upstream kinases.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
14 # Output file can be used for KS enrichment score calculations using Enrichment_Score4Directory.pl
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
15 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
16 # Updated 2022-01-13, Art Eschenlauer, UMN on behalf of Justin Drake's lab:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
17 # Added warnings and used strict;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
18 # fixed some code paths resulting in more NetworKIN matches;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
19 # applied Aho-Corasick algorithm (via external Python script because Perl implementation was still too slow)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
20 # to speed up "Match the non_p_peptides to the @sequences array";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
21 # added support for SQLite-formatted UniProtKB/Swiss-Prot data as an alternative to FASTA-formatted data;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
22 # added support for SQLite output in addition to tabular files.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
23 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
24 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
25 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
26
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
27 use strict;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
28 use warnings 'FATAL' => 'all';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
29
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
30 use Getopt::Std;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
31 use DBD::SQLite::Constants qw/:file_open/;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
32 use DBI qw(:sql_types);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
33 use File::Copy;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
34 use File::Basename;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
35 use POSIX qw(strftime);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
36 use Time::HiRes qw(gettimeofday);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
37 #use Data::Dump qw(dump);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
38
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
39 my $USE_SEARCH_PPEP_PY = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
40 #my $FAILED_MATCH_SEQ = "Failed match";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
41 my $FAILED_MATCH_SEQ = 'No Sequence';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
42 my $FAILED_MATCH_GENE_NAME = 'No_Gene_Name';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
43
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
44 my $dirname = dirname(__FILE__);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
45 my %opts;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
46 my ($file_in, $average_or_sum, $db_out, $file_out, $file_melt, $phospho_type);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
47 my $dbtype;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
48 my ($fasta_in, $networkin_in, $motifs_in, $PSP_Kinase_Substrate_in, $PSP_Regulatory_Sites_in);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
49 my (@samples, %sample_id_lut, %ppep_id_lut, %data, @tmp_data, %n);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
50 my $line = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
51 my @failed_match = ($FAILED_MATCH_SEQ);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
52 my @failed_matches;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
53 my (%all_data);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
54 my (@p_peptides, @non_p_peptides);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
55 my @parsed_fasta;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
56 my (@accessions, @names, @sequences, @databases, $database);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
57 my ($dbfile, $dbh, $stmth);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
58 my @col_names;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
59 my (%matched_sequences, %accessions, %names, %sites, );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
60 my (@tmp_matches, @tmp_accessions, @tmp_names, @tmp_sites);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
61 my (%p_residues, @tmp_p_residues, @p_sites, $left, $right, %p_motifs, @tmp_motifs_array, $tmp_motif, $tmp_site, %residues);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
62 my (@kinases_observed, $kinases);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
63 my (@kinases_observed_lbl, @phosphosites_observed_lbl);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
64 my ($p_sequence_kinase, $p_sequence, $kinase);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
65 my (@motif_sequence, %motif_type, %motif_count);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
66 my (@kinases_PhosphoSite, $kinases_PhosphoSite);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
67 my ($p_sequence_kinase_PhosphoSite, $p_sequence_PhosphoSite, $kinase_PhosphoSite);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
68 my (%regulatory_sites_PhosphoSite_hash);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
69 my (%domain, %ON_FUNCTION, %ON_PROCESS, %ON_PROT_INTERACT, %ON_OTHER_INTERACT, %notes, %organism);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
70 my (%unique_motifs);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
71 my ($kinase_substrate_NetworKIN_matches, $kinase_motif_matches, $kinase_substrate_PhosphoSite_matches);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
72 my %psp_regsite_protein_2;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
73 my (%domain_2, %ON_FUNCTION_2, %ON_PROCESS_2, %ON_PROT_INTERACT_2, %N_PROT_INTERACT, %ON_OTHER_INTERACT_2, %notes_2, %organism_2);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
74 my @timeData;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
75 my $PhosphoSitePlusCitation;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
76 my %site_description;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
77
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
78 my %kinase_substrate_NetworKIN_matches;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
79 my %kinase_motif_matches;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
80 my $regulatory_sites_PhosphoSite;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
81 my ($seq_plus5aa, $seq_plus7aa, %seq_plus7aa_2);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
82 my %kinase_substrate_PhosphoSite_matches;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
83 my @formatted_sequence;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
84 my $pSTY_sequence;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
85 my $i;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
86 my @a;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
87 my $use_sqlite;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
88 my $verbose;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
89
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
90 ##########
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
91 ## opts ##
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
92 ##########
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
93 ## input files
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
94 # i : path to input file, e.g., 'outputfile_STEP2.txt'
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
95 # f : path to UniProtKB/SwissProt FASTA
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
96 # s : optional species argument
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
97 # n : path to NetworKIN_201612_cutoffscore2.0.txt
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
98 # m : path to pSTY_Motifs.txt
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
99 # p : path to 2017-03_PSP_Kinase_Substrate_Dataset.txt
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
100 # r : path to 2017-03_PSP_Regulatory_sites.txt
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
101 ## options
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
102 # P : phospho_type
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
103 # F : function
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
104 # v : verbose output
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
105 ## output files
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
106 # o : path to output file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
107 # O : path to "melted" output file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
108 # D : path to output SQLite file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
109
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
110 sub usage()
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
111 {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
112 print STDERR <<"EOH";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
113 This program given a list of phospho-peptides, finds protein information and upstream kinases.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
114 usage: $0 [-hvd] -f FASTA_file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
115 -h : this (help) message
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
116 -v : slightly verbose
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
117 -a : use SQLite less
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
118 ## input files
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
119 -i : path to input file, e.g., 'outputfile_STEP2.txt'
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
120 -f : path to UniProtDB/SwissProt FASTA
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
121 -s : optional species filter argument for PSP records; defaults to 'human'
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
122 -n : path to NetworKIN_201612_cutoffscore2.0.txt
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
123 -m : path to pSTY_Motifs.txt
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
124 -p : path to 2017-03_PSP_Kinase_Substrate_Dataset.txt
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
125 -r : path to 2017-03_PSP_Regulatory_sites.txt
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
126 ## options
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
127 -P : phospho_type
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
128 -F : function
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
129 ## output files
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
130 -o : path to output file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
131 -O : path to "melted" output file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
132 -D : path to output SQLite file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
133 example: $0
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
134 EOH
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
135 exit;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
136 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
137
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
138 sub format_localtime_iso8601 {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
139 # ref: https://perldoc.perl.org/Time::HiRes
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
140 my ($seconds, $microseconds) = gettimeofday;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
141 # ref: https://pubs.opengroup.org/onlinepubs/9699919799/functions/strftime.html
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
142 return strftime("%Y-%m-%dT%H:%M:%S",localtime(time)) . sprintf(".%03d", $microseconds/1000);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
143 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
144
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
145 sub replace_pSpTpY {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
146 my ($formatted_sequence, $phospho_type) = @_;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
147 if ($phospho_type eq 'y') {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
148 $formatted_sequence =~ s/pS/S/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
149 $formatted_sequence =~ s/pT/T/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
150 $formatted_sequence =~ s/pY/y/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
151 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
152 elsif ($phospho_type eq "sty") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
153 $formatted_sequence =~ s/pS/s/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
154 $formatted_sequence =~ s/pT/t/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
155 $formatted_sequence =~ s/pY/y/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
156 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
157 $formatted_sequence;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
158 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
159
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
160 sub pseudo_sed
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
161 {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
162 # pseudo_sed produces "UniProt_ID\tDescription\tOS\tOX\tGN\tPE\tSV"
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
163 # Comments give the sed equivalent
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
164 my ($t) = @_;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
165 my $s = $t;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
166 # / GN=/!{ s:\(OX=[^ \t]*\):\1 GN=N/A:; };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
167 unless ($s =~ m / GN=/s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
168 {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
169 $s =~ s :(OX=[^ \t]*):${1} GN=N/A:s;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
170 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
171 # / PE=/!{ s:\(GN=[^ \t]*\):\1 PE=N/A:; };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
172 unless ($s =~ m / PE=/s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
173 {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
174 $s =~ s :(GN=[^ \t]*):${1} PE=N/A:s;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
175 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
176 # / SV=/!{ s:\(PE=[^ \t]*\):\1 SV=N/A:; };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
177 unless ($s =~ m / SV=/s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
178 {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
179 $s =~ s :(PE=[^ \t]*):${1} SV=N/A:s;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
180 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
181 # s/^sp.//;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
182 $s =~ s :^...::s;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
183 # s/[|]/\t/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
184 $s =~ s :[|]:\t:sg;
18
196b84357e7e "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 0577d987a208026d9fc94449595a6a1af18ce317"
eschen42
parents: 17
diff changeset
185 if ( !($s =~ m/ OX=/s)
17
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
186 && !($s =~ m/ GN=/s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
187 && !($s =~ m/ PE=/s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
188 && !($s =~ m/ SV=/s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
189 ) {
18
196b84357e7e "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 0577d987a208026d9fc94449595a6a1af18ce317"
eschen42
parents: 17
diff changeset
190 # OS= is used elsewhere, but it's not helpful without OX and GN
19
d7f27d0fca7e "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 13c001168047caa2eefb6b37727ba8974e54a025"
eschen42
parents: 18
diff changeset
191 $s =~ s/OS=/Species /g;
18
196b84357e7e "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 0577d987a208026d9fc94449595a6a1af18ce317"
eschen42
parents: 17
diff changeset
192 # supply sensible default values
17
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
193 $s .= "\tN/A\t-1\tN/A\tN/A\tN/A";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
194 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
195 # s/ OS=/\t/;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
196 if ($s =~ m/ OS=/s) { $s =~ s: OS=:\t:s; } else { $s =~ s:(.*)\t:$1\tN/A\t:x; };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
197 # s/ OX=/\t/;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
198 if ($s =~ m/ OX=/s) { $s =~ s: OX=:\t:s; } else { $s =~ s:(.*)\t:$1\t-1\t:x; };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
199 # s/ GN=/\t/;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
200 if ($s =~ m/ GN=/s) { $s =~ s: GN=:\t:s; } else { $s =~ s:(.*)\t:$1\tN/A\t:x; };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
201 # s/ PE=/\t/;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
202 if ($s =~ m/ PE=/s) { $s =~ s: PE=:\t:s; } else { $s =~ s:(.*)\t:$1\tN/A\t:x; };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
203 # s/ SV=/\t/;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
204 if ($s =~ m/ SV=/s) { $s =~ s: SV=:\t:s; } else { $s =~ s:(.*)\t:$1\tN/A\t:x; };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
205 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
206 return $s;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
207 } # sub pseudo_sed
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
208
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
209 getopts('i:f:s:n:m:p:r:P:F:o:O:D:hva', \%opts) ;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
210
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
211
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
212 if (exists($opts{'h'})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
213 usage();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
214 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
215 if (exists($opts{'a'})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
216 $USE_SEARCH_PPEP_PY = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
217 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
218 if (exists($opts{'v'})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
219 $verbose = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
220 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
221 $verbose = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
222 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
223 if (!exists($opts{'i'}) || !-e $opts{'i'}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
224 die('Input File not found');
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
225 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
226 $file_in = $opts{'i'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
227 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
228 if (!exists($opts{'f'}) || !-e $opts{'f'}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
229 die('FASTA not found');
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
230 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
231 $fasta_in = $opts{'f'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
232 $use_sqlite = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
233 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
234 my $species;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
235 if ((!exists($opts{'s'})) || ($opts{'s'} eq '')) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
236 $species = 'human';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
237 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
238 $species = $opts{'s'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
239 print "'-s' option is '$species'\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
240 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
241 print "species filter is '$species'\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
242
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
243 if (!exists($opts{'n'}) || !-e $opts{'n'}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
244 die('Input NetworKIN File not found');
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
245 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
246 $networkin_in = $opts{'n'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
247 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
248 if (!exists($opts{'m'}) || !-e $opts{'m'}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
249 die('Input pSTY_Motifs File not found');
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
250 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
251 $motifs_in = $opts{'m'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
252 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
253 if (!exists($opts{'p'}) || !-e $opts{'p'}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
254 die('Input PSP_Kinase_Substrate_Dataset File not found');
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
255 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
256 $PSP_Kinase_Substrate_in = $opts{'p'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
257 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
258 if (!exists($opts{'r'}) || !-e $opts{'r'}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
259 die('Input PSP_Regulatory_sites File not found');
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
260 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
261 $PSP_Regulatory_Sites_in = $opts{'r'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
262 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
263 if (exists($opts{'P'})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
264 $phospho_type = $opts{'P'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
265 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
266 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
267 $phospho_type = "sty";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
268 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
269 if (exists($opts{'F'})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
270 $average_or_sum = $opts{'F'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
271 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
272 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
273 $average_or_sum = "sum";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
274 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
275 if (exists($opts{'D'})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
276 $db_out = $opts{'D'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
277 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
278 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
279 $db_out = "db_out.sqlite";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
280 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
281 if (exists($opts{'O'})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
282 $file_melt = $opts{'O'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
283 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
284 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
285 $file_melt = "output_melt.tsv";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
286 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
287 if (exists($opts{'o'})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
288 $file_out = $opts{'o'};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
289 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
290 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
291 $file_out = "output.tsv";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
292 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
293
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
294
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
295 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
296 # Print the relevant file names to the screen
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
297 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
298 # print "\nData file: $data_in\nFASTA file: $fasta_in\nSpecies: $species\nOutput file: $motifs_out\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
299 print "\n--- parameters:\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
300 print "Data file: $file_in\nAverage or sum identical p-sites? $average_or_sum\nOutput file: $file_out\nMelted map: $file_melt\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
301 if ($use_sqlite == 0) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
302 print "Motifs file: $motifs_in\nNetworKIN file: networkin_in\nPhosphosite kinase substrate data: $PSP_Kinase_Substrate_in\nPhosphosite regulatory site data: $PSP_Regulatory_Sites_in\nUniProtKB/SwissProt FASTA file: $fasta_in\nOutput SQLite file: $db_out\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
303 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
304 print "Motifs file: $motifs_in\nNetworKIN file: networkin_in\nPhosphosite kinase substrate data: $PSP_Kinase_Substrate_in\nPhosphosite regulatory site data: $PSP_Regulatory_Sites_in\nUniProtKB/SwissProt SQLIte file: $dbfile\nOutput SQLite file: $db_out\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
305 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
306 print "...\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
307
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
308 print "Phospho-residues(s) = $phospho_type\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
309 if ($phospho_type ne 'y') {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
310 if ($phospho_type ne 'sty') {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
311 die "\nUsage error:\nYou must choose a phospho-type, either y or sty\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
312 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
313 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
314
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
315 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
316 # read the input data file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
317 # average or sum identical phospho-sites, depending on the value of $average_or_sum
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
318 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
319
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
320 open (IN, "$file_in") or die "I couldn't find the input file: $file_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
321
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
322 die "\n\nScript died: You must choose either average or sum for \$average_or_sum\n\n" if (($average_or_sum ne "sum") && ($average_or_sum ne "average")) ;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
323
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
324
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
325 $line = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
326
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
327 while (<IN>) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
328 chomp;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
329 my @x = split(/\t/);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
330 for my $n (0 .. $#x) {$x[$n] =~ s/\r//g; $x[$n] =~ s/\n//g; $x[$n] =~ s/\"//g;}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
331
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
332 # Read in the samples
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
333 if ($line == 0) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
334 for my $n (1 .. $#x) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
335 push (@samples, $x[$n]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
336 $sample_id_lut{$x[$n]} = $n;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
337 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
338 $line++;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
339 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
340 # check whether we have already seen a phospho-peptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
341 if (exists($data{$x[0]})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
342 if ($average_or_sum eq "sum") { # add the data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
343 # unload the data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
344 @tmp_data = (); foreach (@{$data{$x[0]}}) { push(@tmp_data, $_); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
345 # add the new data and repack
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
346 for my $k (0 .. $#tmp_data) { $tmp_data[$k] = $tmp_data[$k] + $x[$k+1]; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
347 $all_data{$x[0]} = (); for my $k (0 .. $#tmp_data) { push(@{$all_data{$x[0]}}, $tmp_data[$k]); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
348
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
349 } elsif ($average_or_sum eq "average") { # average the data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
350 # unload the data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
351 @tmp_data = (); foreach (@{$all_data{$x[0]}}) { push(@tmp_data, $_); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
352 # average with the new data and repack
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
353 for my $k (0 .. $#tmp_data) { $tmp_data[$k] = ( $tmp_data[$k]*$n{$x[0]} + $x[0] ) / ($n{$x[0]} + 1); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
354 $n{$x[0]}++;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
355 $data{$x[0]} = (); for my $k (0 .. $#tmp_data) { push(@{$data{$x[0]}}, $tmp_data[$k]); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
356 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
357 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
358 # if the phospho-sequence has not been seen, save the data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
359 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
360 for my $k (1 .. $#x) { push(@{$data{$x[0]}}, $x[$k]); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
361 $n{$x[0]} = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
362 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
363 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
364 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
365 close(IN);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
366
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
367
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
368 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
369 # Search the FASTA database for phospho-sites and motifs
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
370 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
371 # based on Retrieve_p_peptide_motifs_v2.pl
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
372 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
373
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
374
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
375 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
376 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
377 # Read in the Data file:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
378 # 1) make @p_peptides array as in the original file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
379 # 2) make @non_p_peptides array w/o residue modifications (p, #, other)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
380 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
381 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
382
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
383 foreach my $peptide (keys %data) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
384 $peptide =~ s/s/pS/g; $peptide =~ s/t/pT/g; $peptide =~ s/y/pY/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
385 push (@p_peptides, $peptide);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
386 $peptide =~ s/p//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
387 push(@non_p_peptides, $peptide);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
388 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
389
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
390 if ($use_sqlite == 0) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
391 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
392 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
393 # Read in the UniProtKB/Swiss-Prot data from FASTA; save to @sequences array and SQLite output database
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
394 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
395 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
396
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
397 # e.g.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
398 # >sp|Q9Y3B9|RRP15_HUMAN RRP15-like protein OS=Homo sapiens OX=9606 GN=RRP15 PE=1 SV=2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
399 # MAAAAPDSRVSEEENLKKTPKKKMKMVTGAVASVLEDEATDTSDSEGSCGSEKDHFYSDD
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
400 # DAIEADSEGDAEPCDKENENDGESSVGTNMGWADAMAKVLNKKTPESKPTILVKNKKLEK
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
401 # EKEKLKQERLEKIKQRDKRLEWEMMCRVKPDVVQDKETERNLQRIATRGVVQLFNAVQKH
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
402 # QKNVDEKVKEAGSSMRKRAKLISTVSKKDFISVLRGMDGSTNETASSRKKPKAKQTEVKS
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
403 # EEGPGWTILRDDFMMGASMKDWDKESDGPDDSRPESASDSDT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
404 # accession: Q9Y3B9
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
405 # name: RRP15_HUMAN RRP15-like protein OS=Homo sapiens OX=9606 GN=RRP15 PE=1 SV=2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
406 # sequence: MAAAAPDSRVSEEENLKKTPKKKMKMVTGAVASVLEDEATDTSDSEGSCGSEKDHFYSDD DAIEADSEGDAEPCDKENENDGESSVGTNMGWADAMAKVLNKKTPESKPTILVKNKKLEK EKEKLKQERLEKIKQRDKRLEWEMMCRVKPDVVQDKETERNLQRIATRGVVQLFNAVQKH QKNVDEKVKEAGSSMRKRAKLISTVSKKDFISVLRGMDGSTNETASSRKKPKAKQTEVKS EEGPGWTILRDDFMMGASMKDWDKESDGPDDSRPESASDSDT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
407 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
408 # e.g.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
409 # >gi|114939|sp|P00722.2|BGAL_ECOLI Beta-galactosidase (Lactase) cRAP
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
410 # >gi|52001466|sp|P00366.2|DHE3_BOVIN Glutamate dehydrogenase 1, mitochondrial precursor (GDH) cRAP
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
411 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
412 # e.g.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
413 # >zs|P00009.24.AR-V2_1.zs|zs_peptide_0024_AR-V2_1
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
414
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
415
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
416 open (IN1, "$fasta_in") or die "I couldn't find $fasta_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
417 print "Reading FASTA file $fasta_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
418 # ref: https://perldoc.perl.org/perlsyn#Compound-Statements
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
419 # "If the condition expression of a while statement is based on any of
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
420 # a group of iterative expression types then it gets some magic treatment.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
421 # The affected iterative expression types are readline, the <FILEHANDLE>
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
422 # input operator, readdir, glob, the <PATTERN> globbing operator, and
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
423 # `each`. If the condition expression is one of these expression types,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
424 # then the value yielded by the iterative operator will be implicitly
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
425 # assigned to `$_`."
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
426 while (<IN1>) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
427 chomp;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
428 # ref: https://perldoc.perl.org/functions/split#split-/PATTERN/,EXPR
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
429 # "If only PATTERN is given, EXPR defaults to $_."
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
430 my (@x) = split(/\|/);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
431 # begin FIX >gi|114939|sp|P00722.2|BGAL_ECOLI Beta-galactosidase (Lactase) cRAP
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
432 if (@x > 3) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
433 @x = (">".$x[$#x - 2], $x[$#x - 1], $x[$#x]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
434 if ($_ =~ m/DHE3_BOVIN/) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
435 print "\$_ = $_\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
436 for my $i (0 .. $#x) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
437 print "\$x[$i] = $x[$i]\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
438 };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
439 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
440 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
441 # end FIX >gi|114939|sp|P00722.2|BGAL_ECOLI Beta-galactosidase (Lactase) cRAP
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
442 for my $i (0 .. $#x) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
443 $x[$i] =~ s/\r//g; $x[$i] =~ s/\n//g; $x[$i] =~ s/\"//g; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
444 # Use of uninitialized value $x[0] in pattern match (m//) at /home/rstudio/src/mqppep/tools/mqppep/PhosphoPeptide_Upstream_Kinase_Mapping.pl line 411, <IN1> line 3.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
445 if (exists($x[0])) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
446 if ($x[0] =~ /^>/) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
447 # parsing header line
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
448 $x[0] =~ s/\>//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
449 push (@databases, $x[0]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
450 push (@accessions, $x[1]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
451 push (@names, $x[2]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
452 # format tags of standard UniProtKB headers as tab-separated values
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
453 # pseudo_sed produces "UniProt_ID\tDescription\tOS\tOX\tGN\tPE\tSV"
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
454 $_ = pseudo_sed(join "\t", (">".$x[0], $x[1], $x[2]));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
455 # append tab as separator between header and sequence
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
456 s/$/\t/;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
457 # parsed_fasta gets "UniProt_ID\tDescription\tOS\tOX\tGN\tPE\tSV\t"
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
458 print "push (\@parsed_fasta, $_)\n" if (0 && $x[0] ne "zs");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
459 push (@parsed_fasta, $_);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
460 } elsif ($x[0] =~ /^\w/) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
461 # line is a portion of the sequence
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
462 if (defined $sequences[$#accessions]) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
463 $sequences[$#accessions] = $sequences[$#accessions].$x[0];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
464 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
465 $sequences[$#accessions] = $x[0];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
466 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
467 $parsed_fasta[$#accessions] = $parsed_fasta[$#accessions].$x[0];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
468 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
469 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
470 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
471 close IN1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
472 print "Done Reading FASTA file $fasta_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
473 $dbfile = $db_out;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
474 print "Begin writing $dbfile at " . format_localtime_iso8601() . "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
475 $dbh = DBI->connect("dbi:SQLite:$dbfile", undef, undef);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
476 my $auto_commit = $dbh->{AutoCommit};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
477 print "auto_commit was $auto_commit and is now 0\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
478 $dbh->{AutoCommit} = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
479
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
480 # begin DDL-to-SQLite
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
481 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
482 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
483 DROP TABLE IF EXISTS UniProtKB;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
484 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
485 $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
486
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
487 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
488 CREATE TABLE UniProtKB (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
489 Uniprot_ID TEXT PRIMARY KEY ON CONFLICT IGNORE,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
490 Description TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
491 Organism_Name TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
492 Organism_ID INTEGER,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
493 Gene_Name TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
494 PE TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
495 SV TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
496 Sequence TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
497 Database TEXT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
498 )
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
499 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
500 $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
501 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
502 CREATE UNIQUE INDEX idx_uniq_UniProtKB_0 on UniProtKB(Uniprot_ID);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
503 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
504 $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
505 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
506 CREATE INDEX idx_UniProtKB_0 on UniProtKB(Gene_Name);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
507 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
508 $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
509 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
510 # end DDL-to-SQLite
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
511
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
512 # insert all rows
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
513 # begin store-to-SQLite "UniProtKB" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
514 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
515 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
516 INSERT INTO UniProtKB (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
517 Uniprot_ID,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
518 Description,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
519 Organism_Name,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
520 Organism_ID,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
521 Gene_Name,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
522 PE,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
523 SV,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
524 Sequence,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
525 Database
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
526 ) VALUES (?,?,?,?,?,?,?,?,?)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
527 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
528 my $row_count = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
529 my $row_string;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
530 my (@row, @rows);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
531 my $wrd;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
532 while ( scalar @parsed_fasta > 0 ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
533 $database = $databases[$#parsed_fasta];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
534 # row_string gets "UniProt_ID\tDescription\tOS\tOX\tGN\tPE\tSV\t"
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
535 # 1 2 3 4 5 6 7 sequence database
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
536 $row_string = pop(@parsed_fasta);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
537 @row = (split /\t/, $row_string);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
538 if ((not exists($row[4])) || ($row[4] eq "")) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
539 die("invalid fasta line\n$row_string\n");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
540 };
18
196b84357e7e "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 0577d987a208026d9fc94449595a6a1af18ce317"
eschen42
parents: 17
diff changeset
541 if ($row[4] eq "N/A") {
196b84357e7e "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 0577d987a208026d9fc94449595a6a1af18ce317"
eschen42
parents: 17
diff changeset
542 print "Organism_ID is 'N/A' for row $row_count:\n'$row_string'\n";
196b84357e7e "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 0577d987a208026d9fc94449595a6a1af18ce317"
eschen42
parents: 17
diff changeset
543 $row[4] = -1;
196b84357e7e "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 0577d987a208026d9fc94449595a6a1af18ce317"
eschen42
parents: 17
diff changeset
544 };
17
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
545 for $i (1..3,5..8) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
546 #BIND print "bind_param $i, $row[$i]\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
547 $stmth->bind_param($i, $row[$i]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
548 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
549 #BIND print "bind_param 4, $row[4]\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
550 $stmth->bind_param(9, $database);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
551 #BIND print "bind_param 4, $row[4]\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
552 $stmth->bind_param(4, $row[4], { TYPE => SQL_INTEGER });
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
553 if (not $stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
554 print "Error in row $row_count: " . $dbh->errstr . "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
555 print "Row $row_count: $row_string\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
556 print "Row $row_count: " . ($row_string =~ s/\t/@/g) . "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
557 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
558 if (0 && $database ne "zs") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
559 print "row_count: $row_count\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
560 #### print "row_string: $row_string\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
561 print "Row $row_count: $row_string\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
562 for $i (1..3,5..8) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
563 print "bind_param $i, $row[$i]\n" if (exists($row[$i]));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
564 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
565 print "bind_param 4, $row[4]\n" if (exists($row[4]));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
566 print "bind_param 9, $database\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
567 };
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
568 $row_count += 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
569 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
570 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
571 # end store-to-SQLite "UniProtKB" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
572
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
573 print "begin commit at " . format_localtime_iso8601() . "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
574 $dbh->{AutoCommit} = $auto_commit;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
575 print "auto_commit is now $auto_commit\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
576 $dbh->disconnect if ( defined $dbh );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
577 print "Finished writing $dbfile at " . format_localtime_iso8601() . "\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
578 $dbtype = "FASTA";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
579 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
580
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
581 if ($use_sqlite == 1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
582 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
583 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
584 # Read in the UniProtKB/Swiss-Prot data from SQLite; save to @sequences array
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
585 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
586 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
587
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
588 copy($dbfile, $db_out) or die "Copy $dbfile to $db_out failed: $!";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
589
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
590 # https://metacpan.org/pod/DBD::SQLite#Read-Only-Database
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
591 $dbh = DBI->connect("dbi:SQLite:$dbfile", undef, undef, {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
592 sqlite_open_flags => SQLITE_OPEN_READONLY,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
593 });
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
594 print "DB connection $dbh is to $dbfile\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
595
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
596 # Uniprot_ID, Description, Organism_Name, Organism_ID, Gene_Name, PE, SV, Sequence
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
597 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
598 SELECT Uniprot_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
599 , Description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
600 || CASE WHEN Organism_Name = 'N/A' THEN '' ELSE ' OS=' || Organism_Name END
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
601 || CASE WHEN Organism_ID = -1 THEN '' ELSE ' OX=' || Organism_ID END
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
602 || CASE WHEN Gene_Name = 'N/A' THEN '' ELSE ' GN=' || Gene_Name END
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
603 || CASE WHEN PE = 'N/A' THEN '' ELSE ' PE=' || PE END
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
604 || CASE WHEN SV = 'N/A' THEN '' ELSE ' SV=' || SV END
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
605 AS Description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
606 , Sequence
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
607 , Database
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
608 FROM
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
609 UniProtKB
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
610 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
611 $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
612 @col_names = @{$stmth->{NAME}};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
613 print "\nColumn names selected from UniProtKB SQLite table: " . join(", ", @col_names) . "\n\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
614 while (my @row = $stmth->fetchrow_array) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
615 push (@names, $row[1]); # redacted Description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
616 push (@accessions, $row[0]); # Uniprot_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
617 $sequences[$#accessions] = $row[2]; # Sequence
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
618 push (@databases, $row[3]); # Database (should be 'sp')
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
619 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
620
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
621 $dbh->disconnect if ( defined $dbh );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
622
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
623 print "Done Reading UniProtKB/Swiss-Prot file $dbfile\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
624 $dbtype = "SQLite";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
625 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
626
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
627 print "$#accessions accessions were read from the UniProtKB/Swiss-Prot $dbtype file\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
628
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
629 ######################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
630 $dbh = DBI->connect("dbi:SQLite:$dbfile", undef, undef);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
631 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
632 INSERT INTO UniProtKB (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
633 Uniprot_ID,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
634 Description,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
635 Organism_Name,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
636 Organism_ID,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
637 Gene_Name,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
638 PE,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
639 SV,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
640 Sequence,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
641 Database
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
642 ) VALUES (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
643 'No Uniprot_ID',
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
644 'NO_GENE_SYMBOL No Description',
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
645 'No Organism_Name',
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
646 0,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
647 '$FAILED_MATCH_GENE_NAME',
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
648 '0',
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
649 '0',
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
650 '$FAILED_MATCH_SEQ',
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
651 'No Database'
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
652 )
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
653 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
654 if (not $stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
655 print "Error inserting dummy row into UniProtKB: $stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
656 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
657 $dbh->disconnect if ( defined $dbh );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
658 ######################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
659
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
660 @timeData = localtime(time);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
661 print "\n--- Start search at " . format_localtime_iso8601() ."\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
662
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
663 print " --> Calling 'search_ppep' script\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
664 if ($verbose) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
665 $i = system("python $dirname/search_ppep.py -u $db_out -p $file_in --verbose");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
666 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
667 $i = system("python $dirname/search_ppep.py -u $db_out -p $file_in");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
668 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
669 if ($i) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
670 print "python $dirname/search_ppep.py -u $db_out -p $file_in\n exited with exit code $i\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
671 die "Search failed for phosphopeptides in SwissProt/SQLite file.";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
672 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
673 print " <-- Returned from 'search_ppep' script\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
674
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
675 @timeData = localtime(time);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
676 print "... Finished search at " . format_localtime_iso8601() ."\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
677
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
678
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
679 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
680 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
681 # Match the non_p_peptides to the @sequences array:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
682 # 1) Format the motifs +/- 10 residues around the phospho-site
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
683 # 2) Print the original data plus the phospho-motif to the output file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
684 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
685 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
686
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
687
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
688 print "--- Match the non_p_peptides to the \@sequences array:\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
689
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
690 if ($USE_SEARCH_PPEP_PY) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
691 print "Find the matching protein sequence(s) for the peptide using SQLite\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
692 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
693 print "Find the matching protein sequence(s) for the peptide using slow search\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
694 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
695
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
696 # https://metacpan.org/pod/DBD::SQLite#Read-Only-Database
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
697 $dbh = DBI->connect("dbi:SQLite:$db_out", undef, undef, {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
698 sqlite_open_flags => SQLITE_OPEN_READONLY,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
699 });
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
700 print "DB connection $dbh is to $db_out\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
701
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
702 # CREATE VIEW uniprotid_pep_ppep AS
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
703 # SELECT deppep_UniProtKB.UniprotKB_ID AS accession
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
704 # , deppep.seq AS peptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
705 # , ppep.seq AS phosphopeptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
706 # , UniProtKB.Sequence AS sequence
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
707 # , UniProtKB.Description AS description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
708 # FROM ppep, deppep, deppep_UniProtKB, UniProtKB
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
709 # WHERE deppep.id = ppep.deppep_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
710 # AND deppep.id = deppep_UniProtKB.deppep_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
711 # AND deppep_UniProtKB.UniprotKB_ID = UniProtKB.Uniprot_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
712 # ORDER BY UniprotKB_ID, deppep.seq, ppep.seq;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
713
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
714 my %ppep_to_count_lut;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
715 print "start select peptide counts " . format_localtime_iso8601() . "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
716 my $uniprotkb_pep_ppep_view_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
717 SELECT DISTINCT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
718 phosphopeptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
719 , count(*) as i
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
720 FROM
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
721 uniprotkb_pep_ppep_view
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
722 GROUP BY
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
723 phosphopeptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
724 ORDER BY
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
725 phosphopeptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
726 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
727 if (not $uniprotkb_pep_ppep_view_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
728 die "Error fetching peptide counts: $uniprotkb_pep_ppep_view_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
729 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
730 while (my @row = $uniprotkb_pep_ppep_view_stmth->fetchrow_array) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
731 $ppep_to_count_lut{$row[0]} = $row[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
732 #print "\$ppep_to_count_lut{$row[0]} = $ppep_to_count_lut{$row[0]}\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
733 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
734
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
735 # accession, peptide, sequence, description, phosphopeptide, long_description, pos_start, pos_end, scrubbed, ppep_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
736 # 0 1 2 3 4 5 6 7 8 9
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
737 my $COL_ACCESSION = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
738 my $COL_PEPTIDE = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
739 my $COL_SEQUENCE = 2;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
740 my $COL_DESCRIPTION = 3;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
741 my $COL_PHOSPHOPEPTIDE = 4;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
742 my $COL_LONG_DESCRIPTION = 5;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
743 my $COL_POS_START = 6;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
744 my $COL_POS_END = 7;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
745 my $COL_SCRUBBED = 8;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
746 my $COL_PPEP_ID = 9;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
747
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
748 my %ppep_to_row_lut;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
749 print "start select all records without qualification " . format_localtime_iso8601() . "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
750 $uniprotkb_pep_ppep_view_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
751 SELECT DISTINCT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
752 accession
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
753 , peptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
754 , sequence
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
755 , description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
756 , phosphopeptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
757 , long_description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
758 , pos_start
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
759 , pos_end
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
760 , scrubbed
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
761 , ppep_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
762 FROM
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
763 uniprotkb_pep_ppep_view
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
764 ORDER BY
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
765 phosphopeptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
766 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
767 if (not $uniprotkb_pep_ppep_view_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
768 die "Error fetching all records without qualification: $uniprotkb_pep_ppep_view_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
769 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
770 my $current_ppep;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
771 my $counter = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
772 my $former_ppep = "";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
773 @tmp_matches = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
774 @tmp_accessions = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
775 @tmp_names = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
776 @tmp_sites = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
777 while (my @row = $uniprotkb_pep_ppep_view_stmth->fetchrow_array) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
778 # Identify phosphopeptide for current row;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
779 # it is an error for it to change when the counter is not zero.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
780 $current_ppep = $row[$COL_PHOSPHOPEPTIDE];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
781
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
782 # when counter is zero, prepare for a new phosphopeptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
783 if (not $current_ppep eq $former_ppep) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
784 die "counter is $counter instead of zero" if ($counter != 0);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
785 $ppep_id_lut{$current_ppep} = $row[$COL_PPEP_ID];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
786 print "next phosphpepetide: $current_ppep; id: $ppep_id_lut{$current_ppep}\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
787 $counter = $ppep_to_count_lut{$current_ppep};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
788 @tmp_matches = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
789 @tmp_accessions = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
790 @tmp_names = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
791 @tmp_sites = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
792 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
793
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
794 if ($USE_SEARCH_PPEP_PY) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
795 push(@tmp_matches, $row[ $COL_SEQUENCE ]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
796 push(@tmp_accessions, $row[ $COL_ACCESSION ]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
797 push(@tmp_names, $row[ $COL_LONG_DESCRIPTION ]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
798 push(@tmp_sites, $row[ $COL_POS_START ]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
799 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
800
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
801 # Prepare counter and phosphopeptide tracker for next row
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
802 $former_ppep = $current_ppep;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
803 $counter -= 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
804
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
805 # Set trackers for later use after last instance of current phosphopeptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
806 if ($counter == 0) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
807 if ($USE_SEARCH_PPEP_PY) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
808 $matched_sequences{$current_ppep} = [ @tmp_matches ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
809 $accessions{ $current_ppep} = [ @tmp_accessions ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
810 $names{ $current_ppep} = [ @tmp_names ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
811 $sites{ $current_ppep} = [ @tmp_sites ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
812 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
813 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
814 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
815
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
816
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
817 print "end select all records without qualification " . format_localtime_iso8601() . "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
818
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
819 for my $j (0 .. $#p_peptides) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
820
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
821 #Find the matching protein sequence(s) for the peptide using SQLite
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
822 my ($site, $sequence);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
823 my (@row, @rows);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
824 my $match = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
825 my $p_peptide = $p_peptides[$j];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
826 @tmp_matches = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
827 @tmp_accessions = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
828 @tmp_names = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
829 @tmp_sites = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
830
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
831 #Find the matching protein sequence(s) for the peptide using slow search
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
832 $site = -1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
833 unless ($USE_SEARCH_PPEP_PY) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
834 for my $k (0 .. $#sequences) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
835 $site = index($sequences[$k], $non_p_peptides[$j]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
836 if ($site != -1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
837 push(@tmp_matches, $sequences[$k]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
838 push(@tmp_accessions, $accessions[$k]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
839 push(@tmp_names, $names[$k]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
840 push(@tmp_sites, $site);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
841 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
842 # print "Non-phosphpeptide $non_p_peptides[$j] matched accession $accessions[$k] ($names[$k]) at site $site\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
843 $site = -1; $match++;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
844 # print "tmp_accessions @tmp_accessions \n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
845 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
846 if ($match == 0) { # Check to see if no match was found. Skip to next if no match found.
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
847 print "Warning: Failed match for $p_peptides[$j]\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
848 $matched_sequences{$p_peptides[$j]} = \@failed_match;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
849 push(@failed_matches,$p_peptides[$j]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
850 next;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
851 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
852 $matched_sequences{$p_peptides[$j]} = [ @tmp_matches ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
853 $accessions{$p_peptides[$j]} = [ @tmp_accessions ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
854 $names{$p_peptides[$j]} = [ @tmp_names ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
855 $sites{$p_peptides[$j]} = [ @tmp_sites ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
856 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
857 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
858
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
859 } # end for my $j (0 .. $#p_peptides)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
860
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
861 print "... Finished match the non_p_peptides at " . format_localtime_iso8601() ."\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
862
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
863 print "--- Match the p_peptides to the \@sequences array:\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
864
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
865 for my $peptide_to_match ( keys %matched_sequences ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
866 if (grep($peptide_to_match, @failed_matches)) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
867 print "Failed to match peptide $peptide_to_match\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
868 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
869 next if (grep($peptide_to_match, @failed_matches));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
870 my @matches = @{$matched_sequences{$peptide_to_match}};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
871 @tmp_motifs_array = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
872 for my $i (0 .. $#matches) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
873
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
874 # Find the location of the phospo-site in the sequence(s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
875 $tmp_site = 0; my $offset = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
876 my $tmp_p_peptide = $peptide_to_match;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
877 $tmp_p_peptide =~ s/#//g; $tmp_p_peptide =~ s/\d//g; $tmp_p_peptide =~ s/\_//g; $tmp_p_peptide =~ s/\.//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
878
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
879 # Find all phosphorylated residues in the p_peptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
880 @p_sites = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
881 while ($tmp_site != -1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
882 $tmp_site = index($tmp_p_peptide, 'p', $offset);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
883 if ($tmp_site != -1) {push (@p_sites, $tmp_site);}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
884 $offset = $tmp_site + 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
885 $tmp_p_peptide =~ s/p//;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
886 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
887 @tmp_p_residues = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
888 for my $l (0 .. $#p_sites) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
889 next if not defined $sites{$peptide_to_match}[$i];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
890
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
891 push (@tmp_p_residues, $p_sites[$l] + $sites{$peptide_to_match}[$i]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
892
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
893 # Match the sequences around the phospho residues to find the motifs
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
894 my ($desired_residues_L, $desired_residues_R);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
895 if ($tmp_p_residues[0] - 10 < 0) { #check to see if there are fewer than 10 residues left of the first p-site
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
896 # eg, XXXpYXX want $desired_residues_L = 3, $p_residues[0] = 3
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
897 $desired_residues_L = $tmp_p_residues[0];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
898 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
899 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
900 $desired_residues_L = 10;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
901 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
902 my $seq_length = length($matched_sequences{$peptide_to_match}[$i]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
903 if ($tmp_p_residues[$#tmp_p_residues] + 10 > $seq_length) { #check to see if there are fewer than 10 residues right of the last p-site
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
904 $desired_residues_R = $seq_length - ($tmp_p_residues[$#tmp_p_residues] + 1);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
905 # eg, XXXpYXX want $desired_residues_R = 2, $seq_length = 6, $p_residues[$#p_residues] = 3
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
906 # print "Line 170: seq_length = $seq_length\tp_residue = $p_residues[$#p_residues]\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
907 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
908 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
909 $desired_residues_R = 10;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
910 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
911
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
912 my $total_length = $desired_residues_L + $tmp_p_residues[$#tmp_p_residues] - $tmp_p_residues[0] + $desired_residues_R + 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
913 my $arg2 = $tmp_p_residues[0] - $desired_residues_L;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
914 my $arg1 = $matched_sequences{$peptide_to_match}[$i];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
915
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
916 if (($total_length > 0) && (length($arg1) > $arg2 + $total_length - 1)) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
917 $tmp_motif = substr($arg1, $arg2, $total_length);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
918
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
919 # Put the "p" back in front of the appropriate phospho-residue(s).
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
920 my (@tmp_residues, $tmp_position);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
921 for my $m (0 .. $#p_sites) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
922 # print "Line 183: $p_sites[$m]\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
923 if ($m == 0) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
924 $tmp_position = $desired_residues_L;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
925 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
926 $tmp_position = $desired_residues_L + $p_sites[$m] - $p_sites[0];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
927 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
928 if ($tmp_position < length($tmp_motif) + 1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
929 push (@tmp_residues, substr($tmp_motif, $tmp_position, 1));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
930 if ($tmp_residues[$m] eq "S") {substr($tmp_motif, $tmp_position, 1, "s");}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
931 if ($tmp_residues[$m] eq "T") {substr($tmp_motif, $tmp_position, 1, "t");}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
932 if ($tmp_residues[$m] eq "Y") {substr($tmp_motif, $tmp_position, 1, "y");}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
933 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
934 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
935
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
936 $tmp_motif =~ s/s/pS/g; $tmp_motif =~ s/t/pT/g; $tmp_motif =~ s/y/pY/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
937
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
938 # Comment out on 8.10.13 to remove the numbers from motifs
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
939 my $left_residue = $tmp_p_residues[0] - $desired_residues_L+1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
940 my $right_residue = $tmp_p_residues[$#tmp_p_residues] + $desired_residues_R+1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
941 $tmp_motif = $left_residue."-[ ".$tmp_motif." ]-".$right_residue;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
942 push(@tmp_motifs_array, $tmp_motif);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
943 $residues{$peptide_to_match}{$i} = [ @tmp_residues ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
944 $p_residues{$peptide_to_match}{$i} = [ @tmp_p_residues ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
945 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
946 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
947 $p_motifs{$peptide_to_match} = [ @tmp_motifs_array ];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
948 } # end for my $i (0 .. $#matches) ### this bracket could be in the wrong place
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
949 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
950
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
951 print "... Finished match the p_peptides to the \@sequences array at " . format_localtime_iso8601() ."\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
952
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
953 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
954 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
955 # Annotate the peptides with the NetworKIN predictions and HPRD / Phosida kinase motifs
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
956 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
957 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
958
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
959
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
960 print "--- Reading various site data:\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
961
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
962 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
963 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
964 # Read the NetworKIN_predictions file:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
965 # 1) make a "kinases_observed" array
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
966 # 2) annotate the phospho-substrates with the appropriate kinase
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
967 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
968 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
969 my $SITE_KINASE_SUBSTRATE = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
970 $site_description{$SITE_KINASE_SUBSTRATE} = "NetworKIN";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
971
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
972 open (IN1, "$networkin_in") or die "I couldn't find $networkin_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
973 print "Reading the NetworKIN data: $networkin_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
974 while (<IN1>) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
975 chomp;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
976 my (@x) = split(/\t/);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
977 for my $i (0 .. $#x) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
978 $x[$i] =~ s/\r//g; $x[$i] =~ s/\n//g; $x[$i] =~ s/\"//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
979 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
980 next if ($x[0] eq "#substrate");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
981 if (exists ($kinases -> {$x[2]})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
982 #do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
983 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
984 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
985 $kinases -> {$x[2]} = $x[2];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
986 push (@kinases_observed, $x[2]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
987 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
988 my $tmp = $x[10]."_".$x[2]; #eg, REEILsEMKKV_PKCalpha
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
989 if (exists($p_sequence_kinase -> {$tmp})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
990 #do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
991 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
992 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
993 $p_sequence_kinase -> {$tmp} = $tmp;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
994 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
995 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
996 close IN1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
997
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
998 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
999 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1000 # Read the Kinase motifs file:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1001 # 1) make a "motif_sequence" array
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1002 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1003 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1004
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1005 # file format (tab separated):
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1006 # x[0] = primary key (character), e.g., '17' or '23a'
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1007 # x[1] = pattern (egrep pattern), e.g., '(M|I|L|V|F|Y).R..(pS|pT)'
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1008 # x[2] = description, e.g., 'PKA_Phosida' or '14-3-3 domain binding motif (HPRD)' or 'Akt kinase substrate motif (HPRD & Phosida)'
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1009
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1010 my $SITE_MOTIF = 2;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1011 $site_description{$SITE_MOTIF} = "motif";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1012
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1013 open (IN2, "$motifs_in") or die "I couldn't find $motifs_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1014 print "Reading the Motifs file: $motifs_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1015
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1016 while (<IN2>) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1017 chomp;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1018 my (@x) = split(/\t/);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1019 for my $i (0 .. 2) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1020 $x[$i] =~ s/\r//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1021 $x[$i] =~ s/\n//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1022 $x[$i] =~ s/\"//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1023 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1024 if (exists ($motif_type{$x[1]})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1025 $motif_type{$x[1]} = $motif_type{$x[1]}." & ".$x[2];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1026 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1027 $motif_type{$x[1]} = $x[2];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1028 $motif_count{$x[1]} = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1029 push (@motif_sequence, $x[1]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1030 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1031 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1032 close (IN2);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1033
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1034
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1035 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1036 # 6.28.2011
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1037 # Read PSP_Kinase_Substrate data:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1038 # 1) make a "kinases_PhosphoSite" array
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1039 # 2) annotate the phospho-substrates with the appropriate kinase
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1040 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1041 # Columns:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1042 # (0) GENE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1043 # (1) KINASE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1044 # (2) KIN_ACC_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1045 # (3) KIN_ORGANISM
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1046 # (4) SUBSTRATE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1047 # (5) SUB_GENE_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1048 # (6) SUB_ACC_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1049 # (7) SUB_GENE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1050 # (8) SUB_ORGANISM
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1051 # (9) SUB_MOD_RSD
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1052 # (10) SITE_GRP_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1053 # (11) SITE_+/-7_AA
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1054 # (12) DOMAIN
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1055 # (13) IN_VIVO_RXN
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1056 # (14) IN_VITRO_RXN
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1057 # (15) CST_CAT#
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1058 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1059
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1060 my $SITE_PHOSPHOSITE = 3;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1061 $site_description{$SITE_PHOSPHOSITE} = "PhosphoSite";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1062
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1063
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1064 $line = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1065
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1066 open (IN3, "$PSP_Kinase_Substrate_in") or die "I couldn't find $PSP_Kinase_Substrate_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1067 print "Reading the PhosphoSite Kinase-Substrate data: $PSP_Kinase_Substrate_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1068
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1069 while (<IN3>) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1070 chomp;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1071 my (@x) = split(/\t/);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1072 for my $i (0 .. $#x) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1073 $x[$i] =~ s/\r//g; $x[$i] =~ s/\n//g; $x[$i] =~ s/\"//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1074 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1075 if ($line != 0) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1076 if (($species eq $x[3]) && ($species eq $x[8])) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1077 if (exists ($kinases_PhosphoSite -> {$x[0]})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1078 #do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1079 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1080 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1081 $kinases_PhosphoSite -> {$x[0]} = $x[0];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1082 push (@kinases_PhosphoSite, $x[0]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1083 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1084 my $offset = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1085 # Replace the superfluous lower case s, t and y
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1086 my @lowercase = ('s','t','y');
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1087 my @uppercase = ('S','T','Y');
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1088 for my $k (0 .. 2) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1089 my $site = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1090 while ($site != -1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1091 $site = index($x[11],$lowercase[$k], $offset);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1092 if (($site != 7) && ($site != -1)) {substr($x[11], $site, 1, $uppercase[$k]);}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1093 $offset = $site + 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1094 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1095 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1096 my $tmp = $x[11]."_".$x[0]; #eg, RTPGRPLsSYGMDSR_PAK2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1097 if (exists($p_sequence_kinase_PhosphoSite -> {$tmp})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1098 #do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1099 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1100 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1101 $p_sequence_kinase_PhosphoSite -> {$tmp} = $tmp;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1102 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1103 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1104 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1105 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1106 #print "PSP_kinase_substrate line rejected because KIN_ORGANISM is '$x[3]' and SUB_ORGANISM is '$x[8]': $line\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1107 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1108 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1109 $line++;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1110 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1111 close IN3;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1112
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1113
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1114 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1115 # Read PhosphoSite regulatory site data:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1116 # 1) make a "regulatory_sites_PhosphoSite" hash
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1117 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1118 # Columns:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1119 # (0) GENE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1120 # (1) PROTEIN --> #ACE %psp_regsite_protein
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1121 # (2) PROT_TYPE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1122 # (3) ACC_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1123 # (4) GENE_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1124 # (5) HU_CHR_LOC
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1125 # (6) ORGANISM --> %organism
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1126 # (7) MOD_RSD
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1127 # (8) SITE_GRP_ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1128 # (9) SITE_+/-7_AA --> %regulatory_sites_PhosphoSite_hash
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1129 # (10) DOMAIN --> %domain
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1130 # (11) ON_FUNCTION --> %ON_FUNCTION
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1131 # (12) ON_PROCESS --> %ON_PROCESS
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1132 # (13) ON_PROT_INTERACT --> %ON_PROT_INTERACT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1133 # (14) ON_OTHER_INTERACT --> %ON_OTHER_INTERACT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1134 # (15) PMIDs
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1135 # (16) LT_LIT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1136 # (17) MS_LIT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1137 # (18) MS_CST
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1138 # (19) NOTES --> %notes
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1139 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1140
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1141
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1142 $dbh = DBI->connect("dbi:SQLite:$db_out", undef, undef);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1143 my $auto_commit = $dbh->{AutoCommit};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1144 $dbh->{AutoCommit} = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1145 print "DB connection $dbh is to $db_out, opened for modification\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1146
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1147 # add partial PSP_Regulatory_site table (if not exists) regardless of whether SwissProt input was FASTA or SQLite
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1148 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1149 CREATE TABLE IF NOT EXISTS PSP_Regulatory_site (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1150 SITE_PLUSMINUS_7AA TEXT PRIMARY KEY ON CONFLICT IGNORE,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1151 DOMAIN TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1152 ON_FUNCTION TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1153 ON_PROCESS TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1154 ON_PROT_INTERACT TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1155 ON_OTHER_INTERACT TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1156 NOTES TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1157 ORGANISM TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1158 PROTEIN TEXT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1159 )
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1160 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1161 $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1162
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1163 # add partial PSP_Regulatory_site LUT (if not exists) regardless of whether SwissProt input was FASTA or SQLite
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1164 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1165 CREATE TABLE IF NOT EXISTS ppep_regsite_LUT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1166 ( ppep_id INTEGER REFERENCES ppep(id)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1167 , site_plusminus_7AA TEXT REFERENCES PSP_Regulatory_site(site_plusminus_7AA)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1168 , PRIMARY KEY (ppep_id, site_plusminus_7AA) ON CONFLICT IGNORE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1169 );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1170 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1171 $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1172
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1173 # $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1174 # CREATE UNIQUE INDEX idx_PSP_Regulatory_site_0
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1175 # ON PSP_Regulatory_site(site_plusminus_7AA);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1176 # ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1177 # $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1178
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1179
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1180 # add Citation table (if not exists) regardless of whether SwissProt input was FASTA or SQLite
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1181 my $citation_sql;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1182 $citation_sql = "
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1183 CREATE TABLE IF NOT EXISTS Citation (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1184 ObjectName TEXT REFERENCES sqlite_schema(name) ON DELETE CASCADE,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1185 CitationData TEXT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1186 PRIMARY KEY (ObjectName, CitationData) ON CONFLICT IGNORE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1187 )
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1188 ";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1189 $stmth = $dbh->prepare($citation_sql);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1190 $stmth->execute();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1191
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1192
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1193 open (IN4, "$PSP_Regulatory_Sites_in") or die "I couldn't find $PSP_Regulatory_Sites_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1194 print "Reading the PhosphoSite regulatory site data: $PSP_Regulatory_Sites_in\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1195
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1196
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1197 $line = -1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1198 while (<IN4>) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1199 $line++;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1200 chomp;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1201 if ($_ =~ m/PhosphoSitePlus/) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1202 #$PhosphoSitePlusCitation = ($_ =~ s/PhosphoSitePlus/FooBar/g);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1203 $PhosphoSitePlusCitation = $_;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1204 $PhosphoSitePlusCitation =~ s/\t//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1205 $PhosphoSitePlusCitation =~ s/\r//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1206 $PhosphoSitePlusCitation =~ s/\n//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1207 $PhosphoSitePlusCitation =~ s/""/"/g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1208 $PhosphoSitePlusCitation =~ s/^"//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1209 $PhosphoSitePlusCitation =~ s/"$//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1210 print "$PhosphoSitePlusCitation\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1211 next;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1212 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1213 my (@x) = split(/\t/);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1214 for my $i (0 .. $#x) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1215 $x[$i] =~ s/\r//g; $x[$i] =~ s/\n//g; $x[$i] =~ s/\"//g;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1216 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1217 my $found_GENE=0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1218 if ( (not exists($x[0])) ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1219 next;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1220 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1221 elsif ( ($x[0] eq "GENE") ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1222 $found_GENE=1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1223 next;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1224 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1225 if ( (not exists($x[9])) || ($x[9] eq "") ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1226 if (exists($x[8]) && (not $x[8] eq "")) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1227 die "$PSP_Regulatory_Sites_in line $line has no SITE_+/-7_AA: $_\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1228 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1229 if ( (not exists($x[1])) || (not $x[1] eq "") ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1230 print "$PSP_Regulatory_Sites_in line $line (".length($_)." characters) has no SITE_+/-7_AA: $_\n"
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1231 if $found_GENE==1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1232 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1233 next;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1234 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1235 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1236 elsif ($line != 0) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1237 if ($species ne $x[6]) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1238 # Do nothing - this record was filtered out by the species filter
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1239 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1240 elsif (!exists($regulatory_sites_PhosphoSite_hash{$x[9]})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1241 if (!defined $domain{$x[9]} || $domain{$x[9]} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1242 $regulatory_sites_PhosphoSite_hash{$x[9]} = $x[9];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1243 $domain{$x[9]} = $x[10];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1244 $ON_FUNCTION{$x[9]} = $x[11];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1245 $ON_PROCESS{$x[9]} = $x[12];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1246 $ON_PROT_INTERACT{$x[9]} = $x[13];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1247 $ON_OTHER_INTERACT{$x[9]} = $x[14];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1248 $notes{$x[9]} = $x[19];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1249 $organism{$x[9]} = $x[6];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1250 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1251 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1252 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1253 # $domain
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1254 if (!defined $domain{$x[9]} || $domain{$x[9]} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1255 if ($x[10] ne "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1256 $domain{$x[9]} = $domain{$x[10]};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1257 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1258 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1259 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1260 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1261 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1262 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1263 if ($domain{$x[9]} =~ /$x[10]/) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1264 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1265 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1266 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1267 $domain{$x[9]} = $domain{$x[9]}." / ".$x[10];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1268 #print "INFO line $line - compound domain for 7aa: GENE $x[0] PROTEIN $x[1] PROT_TYPE $x[2] ACC_ID $x[3] GENE_ID $x[4] HU_CHR_LOC $x[5] ORGANISM $x[6] MOD_RSD $x[7] SITE_GRP_ID $x[8] SITE_+/-7_AA $x[9] DOMAIN $domain{$x[9]}\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1269 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1270 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1271
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1272 # $ON_FUNCTION
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1273 if (!defined $ON_FUNCTION{$x[9]} || $ON_FUNCTION{$x[9]} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1274 $ON_FUNCTION{$x[9]} = $ON_FUNCTION{$x[10]};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1275 } elsif ($x[10] eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1276 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1277 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1278 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1279 $ON_FUNCTION{$x[9]} = $ON_FUNCTION{$x[9]}." / ".$x[10];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1280 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1281
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1282 # $ON_PROCESS
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1283 if (!defined $ON_PROCESS{$x[9]} || $ON_PROCESS{$x[9]} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1284 $ON_PROCESS{$x[9]} = $ON_PROCESS{$x[10]};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1285 } elsif ($x[10] eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1286 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1287 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1288 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1289 $ON_PROCESS{$x[9]} = $ON_PROCESS{$x[9]}." / ".$x[10];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1290 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1291
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1292 # $ON_PROT_INTERACT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1293 if (!defined $ON_PROT_INTERACT{$x[9]} || $ON_PROT_INTERACT{$x[9]} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1294 $ON_PROT_INTERACT{$x[9]} = $ON_PROT_INTERACT{$x[10]};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1295 } elsif ($x[10] eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1296 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1297 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1298 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1299 $ON_PROT_INTERACT{$x[9]} = $ON_PROT_INTERACT{$x[9]}." / ".$x[10];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1300 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1301
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1302 # $ON_OTHER_INTERACT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1303 if (!defined $ON_OTHER_INTERACT{$x[9]} || $ON_OTHER_INTERACT{$x[9]} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1304 $ON_OTHER_INTERACT{$x[9]} = $ON_OTHER_INTERACT{$x[10]};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1305 } elsif ($x[10] eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1306 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1307 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1308 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1309 $ON_OTHER_INTERACT{$x[9]} = $ON_OTHER_INTERACT{$x[9]}." / ".$x[10];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1310 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1311
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1312 # $notes
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1313 if (!defined $notes{$x[9]} || $notes{$x[9]} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1314 $notes{$x[9]} = $notes{$x[10]};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1315 } elsif ($x[10] eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1316 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1317 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1318 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1319 $notes{$x[9]} = $notes{$x[9]}." / ".$x[10];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1320 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1321
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1322 # $organism
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1323 if (!defined $organism{$x[9]} || $organism{$x[9]} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1324 $organism{$x[9]} = $organism{$x[10]};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1325 } elsif ($x[10] eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1326 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1327 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1328 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1329 $organism{$x[9]} = $organism{$x[9]}." / ".$x[10];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1330 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1331 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1332 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1333 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1334 close IN4;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1335
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1336 print "... Finished reading various site data at " . format_localtime_iso8601() ."\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1337
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1338 $stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1339 INSERT INTO Citation (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1340 ObjectName,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1341 CitationData
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1342 ) VALUES (?,?)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1343 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1344
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1345 sub add_citation {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1346 my ($cit_table, $cit_text, $cit_label) = @_;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1347 $stmth->bind_param(1, $cit_table);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1348 $stmth->bind_param(2, $cit_text);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1349 if (not $stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1350 print "Error writing $cit_label cit for table $cit_table: $stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1351 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1352 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1353 my ($citation_text, $citation_table);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1354
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1355 # PSP regulatory or kinase/substrate site
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1356 $citation_text = 'PhosphoSitePlus(R) (PSP) was created by Cell Signaling Technology Inc. It is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. When using PSP data or analyses in printed publications or in online resources, the following acknowledgements must be included: (a) the words "PhosphoSitePlus(R), www.phosphosite.org" must be included at appropriate places in the text or webpage, and (b) the following citation must be included in the bibliography: "Hornbeck PV, Zhang B, Murray B, Kornhauser JM, Latham V, Skrzypek E PhosphoSitePlus, 2014: mutations, PTMs and recalibrations. Nucleic Acids Res. 2015 43:D512-20. PMID: 25514926."';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1357 $citation_table = "PSP_Regulatory_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1358 add_citation($citation_table, $citation_text, "PSP_Kinase_Substrate");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1359 $citation_table = "psp_gene_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1360 add_citation($citation_table, $citation_text, "PSP_Kinase_Substrate");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1361 $citation_table = "psp_gene_site_view";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1362 add_citation($citation_table, $citation_text, "PSP_Regulatory_site");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1363 $citation_text = 'Hornbeck, 2014, "PhosphoSitePlus, 2014: mutations, PTMs and recalibrations.", https://pubmed.ncbi.nlm.nih.gov/22135298, https://doi.org/10.1093/nar/gkr1122';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1364 $citation_table = "PSP_Regulatory_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1365 add_citation($citation_table, $citation_text, "PSP_Regulatory_site");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1366 $citation_table = "psp_gene_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1367 add_citation($citation_table, $citation_text, "PSP_Kinase_Substrate");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1368 $citation_table = "psp_gene_site_view";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1369 add_citation($citation_table, $citation_text, "PSP_Kinase_Substrate");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1370
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1371 # NetworKIN site
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1372 $citation_text = 'Linding, 2007, "Systematic discovery of in vivo phosphorylation networks.", https://pubmed.ncbi.nlm.nih.gov/17570479, https://doi.org/10.1016/j.cell.2007.05.052';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1373 $citation_table = "psp_gene_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1374 add_citation($citation_table, $citation_text, "NetworkKIN");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1375 $citation_table = "psp_gene_site_view";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1376 add_citation($citation_table, $citation_text, "NetworkKIN");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1377 $citation_text = 'Horn, 2014, "KinomeXplorer: an integrated platform for kinome biology studies.", https://pubmed.ncbi.nlm.nih.gov/24874572, https://doi.org/10.1038/nmeth.296';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1378 $citation_table = "psp_gene_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1379 add_citation($citation_table, $citation_text, "NetworkKIN");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1380 $citation_table = "psp_gene_site_view";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1381 add_citation($citation_table, $citation_text, "NetworkKIN");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1382 $citation_text = 'Aken, 2016, "The Ensembl gene annotation system.", https://pubmed.ncbi.nlm.nih.gov/33137190, https://doi.org/10.1093/database/baw093';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1383 $citation_table = "psp_gene_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1384 add_citation($citation_table, $citation_text, "NetworkKIN");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1385 $citation_table = "psp_gene_site_view";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1386 add_citation($citation_table, $citation_text, "NetworkKIN");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1387
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1388 # pSTY motifs
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1389 $citation_text = 'Amanchy, 2007, "A curated compendium of phosphorylation motifs.", https://pubmed.ncbi.nlm.nih.gov/17344875, https://doi.org/10.1038/nbt0307-285';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1390 $citation_table = "psp_gene_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1391 add_citation($citation_table, $citation_text, "Amanchy_pSTY_motifs");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1392 $citation_table = "psp_gene_site_view";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1393 add_citation($citation_table, $citation_text, "Amanchy_pSTY_motifs");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1394 $citation_text = 'Gnad, 2011, "PHOSIDA 2011: the posttranslational modification database.", https://pubmed.ncbi.nlm.nih.gov/21081558, https://doi.org/10.1093/nar/gkq1159';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1395 $citation_table = "psp_gene_site";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1396 add_citation($citation_table, $citation_text, "Phosida_pSTY_motifs");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1397 $citation_table = "psp_gene_site_view";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1398 add_citation($citation_table, $citation_text, "Phosida_pSTY_motifs");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1399
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1400
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1401 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1402 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1403 # Read the data file:
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1404 # 1) find sequences that match the NetworKIN predictions
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1405 # 2) find motifs that match the observed sequences
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1406 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1407 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1408
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1409 print "--- Find sequences that match the NetworKIN predictions and find motifs that match observed sequences\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1410
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1411 my $ppep_regsite_LUT_stmth;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1412 $ppep_regsite_LUT_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1413 INSERT INTO ppep_regsite_LUT (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1414 ppep_id,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1415 site_plusminus_7AA
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1416 ) VALUES (?,?)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1417 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1418
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1419 my ($start_seconds, $start_microseconds) = gettimeofday;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1420
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1421 foreach my $peptide (keys %data) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1422 # find the unique phospho-motifs for this $peptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1423 my @all_motifs = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1424 my $have_all_motifs = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1425 for my $i (0 .. $#{ $matched_sequences{$peptide} } ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1426 my $tmp_motif = $p_motifs{$peptide}[$i];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1427 push(@all_motifs, $tmp_motif);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1428 $have_all_motifs = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1429 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1430 if ($have_all_motifs == 1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1431 for my $j (0 .. $#all_motifs) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1432 if (defined $all_motifs[$j]) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1433 $all_motifs[$j] =~ s/\d+-\[\s//;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1434 $all_motifs[$j] =~ s/\s\]\-\d+//;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1435 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1436 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1437 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1438 my %seen = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1439 if ($have_all_motifs == 1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1440 foreach my $a (@all_motifs) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1441 if (defined $a) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1442 if (exists($seen{$a})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1443 next;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1444 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1445 push(@{$unique_motifs{$peptide}}, $a);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1446 $seen{$a} = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1447 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1448 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1449 print "push(\@{\$unique_motifs{$peptide}}, $a);\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1450 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1451 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1452
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1453 # count the number of phospo-sites in the motif
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1454 my $number_pY = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1455 my $number_pSTY = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1456 if ($phospho_type eq 'y') {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1457 if (defined(${$unique_motifs{$peptide}}[0])) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1458 while (${$unique_motifs{$peptide}}[0] =~ /pY/g) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1459 $number_pY++;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1460 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1461 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1462 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1463 if ($phospho_type eq 'sty') {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1464 print "looking for unique_motifs for $peptide\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1465 if (defined(${$unique_motifs{$peptide}}[0])) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1466 while (${$unique_motifs{$peptide}}[0] =~ /(pS|pT|pY)/g) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1467 $number_pSTY++;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1468 print "We have found $number_pSTY unique_motifs for $peptide\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1469 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1470 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1471 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1472
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1473
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1474 # search each of the unique motifs for matches
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1475 print "searching $#{$unique_motifs{$peptide}} motifs for peptide $peptide\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1476 for my $i (0 .. $#{$unique_motifs{$peptide}}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1477 print "\$i = $i; peptide = $peptide; unique_motif = ${$unique_motifs{$peptide}}[$i]\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1478 my $tmp_motif = ${$unique_motifs{$peptide}}[$i];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1479 print " --- matching unique motif $tmp_motif for peptide $peptide at " . format_localtime_iso8601() ."\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1480 my $formatted_sequence;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1481 if (($number_pY == 1) || ($number_pSTY == 1)) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1482 my $seq_plus5aa = "";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1483 my $seq_plus7aa = "";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1484 $formatted_sequence = &replace_pSpTpY($tmp_motif, $phospho_type);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1485 print " a #pY $number_pY; #pSTY $number_pSTY; matching formatted motif $formatted_sequence for peptide $peptide at " . format_localtime_iso8601() ."\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1486 if ($phospho_type eq 'y') {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1487 $seq_plus5aa = (split(/(\w{0,5}y\w{0,5})/, $formatted_sequence))[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1488 $seq_plus7aa = (split(/(\w{0,7}y\w{0,7})/, $formatted_sequence))[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1489 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1490 elsif ($phospho_type eq "sty") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1491 $seq_plus5aa = (split(/(\w{0,5}(s|t|y)\w{0,5})/, $formatted_sequence))[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1492 $seq_plus7aa = (split(/(\w{0,7}(s|t|y)\w{0,7})/, $formatted_sequence))[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1493 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1494
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1495 if (defined $seq_plus7aa) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1496 # commit the 7aa LUT records
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1497 $ppep_regsite_LUT_stmth->bind_param( 1, $ppep_id_lut{$peptide} );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1498 $ppep_regsite_LUT_stmth->bind_param( 2, $seq_plus7aa );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1499 if (not $ppep_regsite_LUT_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1500 print "Error writing tuple ($ppep_id_lut{$peptide},$seq_plus7aa) for peptide $peptide to ppep_regsite_LUT: $ppep_regsite_LUT_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1501 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1502 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1503 for my $i (0 .. $#kinases_observed) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1504 if (defined $seq_plus5aa) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1505 my $tmp = $seq_plus5aa."_".$kinases_observed[$i]; #eg, should be PGRPLsSYGMD_PKCalpha
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1506 if (exists($p_sequence_kinase -> {$tmp})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1507 $kinase_substrate_NetworKIN_matches{$peptide}{$kinases_observed[$i]} = "X"; #ACE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1508 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1509 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1510 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1511 for my $i (0 .. $#motif_sequence) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1512 if ($peptide =~ /$motif_sequence[$i]/) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1513 $kinase_motif_matches{$peptide}{$motif_sequence[$i]} = "X";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1514 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1515 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1516 for my $i (0 .. $#kinases_PhosphoSite) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1517 if (defined $seq_plus7aa) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1518 my $tmp = $seq_plus7aa."_".$kinases_PhosphoSite[$i]; #eg, should be RTPGRPLsSYGMDSR_PAK2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1519 if (exists($p_sequence_kinase_PhosphoSite -> {$tmp})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1520 $kinase_substrate_PhosphoSite_matches{$peptide}{$kinases_PhosphoSite[$i]} = "X";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1521 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1522 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1523 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1524 if (exists($regulatory_sites_PhosphoSite_hash{$seq_plus7aa})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1525 $seq_plus7aa_2{$peptide} = $seq_plus7aa;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1526 $domain_2{$peptide} = $domain{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1527 $ON_FUNCTION_2{$peptide} = $ON_FUNCTION{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1528 $ON_PROCESS_2{$peptide} = $ON_PROCESS{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1529 $ON_PROT_INTERACT_2{$peptide} = $ON_PROT_INTERACT{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1530 $ON_OTHER_INTERACT_2{$peptide} = $ON_OTHER_INTERACT{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1531 $notes_2{$peptide} = $notes{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1532 $organism_2{$peptide} = $organism{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1533 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1534 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1535 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1536 elsif (($number_pY > 1) || ($number_pSTY > 1)) { #eg, if $x[4] is 1308-[ VIYFQAIEEVpYpYDHLRSAAKKR ]-1329 and $number_pY == 2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1537 $formatted_sequence = $tmp_motif;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1538 $seq_plus5aa = "";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1539 $seq_plus7aa = "";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1540 #Create the sequences with only one phosphorylation site
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1541 #eg, 1308-[ VIYFQAIEEVpYpYDHLRSAAKKR ]-1329, which becomes 1308-[ VIYFQAIEEVpYYDHLRSAAKKR ]-1329 and 1308-[ VIYFQAIEEVYpYDHLRSAAKKR ]-1329
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1542
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1543 my (@sites, $offset, $next_p_site);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1544 $sites[0] = index($tmp_motif, "p");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1545 $offset = $sites[0] + 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1546 $next_p_site = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1547 while ($next_p_site != -1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1548 $next_p_site = index($tmp_motif, "p", $offset);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1549 if ($next_p_site != -1) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1550 push (@sites, $next_p_site);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1551 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1552 $offset = $next_p_site+1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1553 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1554
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1555 my @pSTY_sequences;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1556 for my $n (0 .. $#sites) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1557 $pSTY_sequences[$n] = $tmp_motif;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1558 for (my $m = $#sites; $m >= 0; $m--) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1559 if ($m != $n) {substr($pSTY_sequences[$n], $sites[$m], 1) = "";}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1560 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1561 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1562
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1563 my @formatted_sequences;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1564 for my $k (0 .. $#sites) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1565 $formatted_sequences[$k] = &replace_pSpTpY($pSTY_sequences[$k], $phospho_type);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1566 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1567
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1568 for my $k (0 .. $#formatted_sequences) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1569 print " b #pY $number_pY; #pSTY $number_pSTY; matching formatted motif $formatted_sequences[$k] for peptide $peptide at " . format_localtime_iso8601() ."\n" if ($verbose);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1570 if ($phospho_type eq 'y') {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1571 $seq_plus5aa = (split(/(\w{0,5}y\w{0,5})/, $formatted_sequences[$k]))[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1572 $seq_plus7aa = (split(/(\w{0,7}y\w{0,7})/, $formatted_sequences[$k]))[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1573 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1574 elsif ($phospho_type eq "sty") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1575 $seq_plus5aa = (split(/(\w{0,5}(s|t|y)\w{0,5})/, $formatted_sequences[$k]))[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1576 $seq_plus7aa = (split(/(\w{0,7}(s|t|y)\w{0,7})/, $formatted_sequences[$k]))[1];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1577 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1578 for my $i (0 .. $#kinases_observed) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1579 my $tmp = $seq_plus5aa."_".$kinases_observed[$i]; #eg, should look like REEILsEMKKV_PKCalpha
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1580 if (exists($p_sequence_kinase -> {$tmp})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1581 $kinase_substrate_NetworKIN_matches{$peptide}{$kinases_observed[$i]} = "X";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1582 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1583 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1584 $pSTY_sequence = $formatted_sequences[$k];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1585 for my $i (0 .. $#motif_sequence) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1586 if ($pSTY_sequence =~ /$motif_sequence[$i]/) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1587 $kinase_motif_matches{$peptide}{$motif_sequence[$i]} = "X";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1588 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1589 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1590 for my $i (0 .. $#kinases_PhosphoSite) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1591 my $tmp = $seq_plus7aa."_".$kinases_PhosphoSite[$i]; #eg, should be RTPGRPLsSYGMDSR_PAK2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1592 #print "seq_plus7aa._.kinases_PhosphoSite[i] is $tmp";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1593 if (exists($p_sequence_kinase_PhosphoSite -> {$tmp})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1594 $kinase_substrate_PhosphoSite_matches{$peptide}{$kinases_PhosphoSite[$i]} = "X";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1595 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1596 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1597 if (exists($regulatory_sites_PhosphoSite -> {$seq_plus7aa})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1598 $seq_plus7aa_2{$peptide} = $seq_plus7aa;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1599
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1600 # $domain
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1601 if ($domain_2{$peptide} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1602 $domain_2{$peptide} = $domain{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1603 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1604 elsif ($domain{$seq_plus7aa} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1605 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1606 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1607 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1608 $domain_2{$peptide} = $domain_2{$peptide}." / ".$domain{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1609 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1610
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1611
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1612 # $ON_FUNCTION_2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1613 if ($ON_FUNCTION_2{$peptide} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1614 $ON_FUNCTION_2{$peptide} = $ON_FUNCTION{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1615 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1616 elsif ($ON_FUNCTION{$seq_plus7aa} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1617 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1618 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1619 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1620 $ON_FUNCTION_2{$peptide} = $ON_FUNCTION_2{$peptide}." / ".$ON_FUNCTION{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1621 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1622
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1623 # $ON_PROCESS_2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1624 if ($ON_PROCESS_2{$peptide} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1625 $ON_PROCESS_2{$peptide} = $ON_PROCESS{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1626 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1627 elsif ($ON_PROCESS{$seq_plus7aa} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1628 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1629 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1630 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1631 $ON_PROCESS_2{$peptide} = $ON_PROCESS_2{$peptide}." / ".$ON_PROCESS{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1632 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1633
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1634 # $ON_PROT_INTERACT_2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1635 if ($ON_PROT_INTERACT_2{$peptide} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1636 $ON_PROT_INTERACT_2{$peptide} = $ON_PROT_INTERACT{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1637 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1638 elsif ($ON_PROT_INTERACT{$seq_plus7aa} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1639 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1640 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1641 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1642 $ON_PROT_INTERACT_2{$peptide} = $ON_PROT_INTERACT_2{$peptide}." / ".$ON_PROT_INTERACT{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1643 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1644
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1645 # $ON_OTHER_INTERACT_2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1646 if ($ON_OTHER_INTERACT_2{$peptide} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1647 $ON_OTHER_INTERACT_2{$peptide} = $ON_OTHER_INTERACT{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1648 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1649 elsif ($ON_OTHER_INTERACT{$seq_plus7aa} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1650 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1651 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1652 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1653 $ON_OTHER_INTERACT_2{$peptide} = $ON_OTHER_INTERACT_2{$peptide}." / ".$ON_OTHER_INTERACT{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1654 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1655
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1656 # $notes_2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1657 if ($notes_2{$peptide} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1658 $notes_2{$peptide} = $notes{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1659 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1660 elsif ($notes{$seq_plus7aa} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1661 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1662 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1663 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1664 $notes_2{$peptide} = $notes_2{$peptide}." / ".$notes{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1665 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1666 $notes_2{$peptide} = $notes{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1667
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1668 # $organism_2
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1669 if ($organism_2{$peptide} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1670 $organism_2{$peptide} = $organism{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1671 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1672 elsif ($organism{$seq_plus7aa} eq "") {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1673 # do nothing
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1674 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1675 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1676 $organism_2{$peptide} = $organism_2{$peptide}." / ".$organism{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1677 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1678 $organism_2{$peptide} = $organism{$seq_plus7aa};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1679 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1680 } # if (exists($regulatory_sites_PhosphoSite -> {$seq_plus7aa}))
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1681 } # for my $k (0 .. $#formatted_sequences)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1682 } # if/else number of phosphosites
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1683 } # for each motif i # for my $i (0 .. $#{$unique_motifs{$peptide}})
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1684 } # for each $peptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1685
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1686 my ($end_seconds, $end_microseconds) = gettimeofday;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1687
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1688 my $delta_seconds = $end_seconds - $start_seconds;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1689 my $delta_microseconds = $end_microseconds - $start_microseconds;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1690 $delta_microseconds += 1000000 * $delta_seconds;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1691 my $key_count = keys(%data);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1692 print sprintf("Average search time is %d microseconds per phopshopeptide\n", ($delta_microseconds / $key_count));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1693
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1694 ($start_seconds, $start_microseconds) = gettimeofday;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1695
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1696 print "Writing PSP_Regulatory_site records\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1697
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1698 my $psp_regulatory_site_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1699 INSERT INTO PSP_Regulatory_site (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1700 DOMAIN,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1701 ON_FUNCTION,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1702 ON_PROCESS,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1703 ON_PROT_INTERACT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1704 ON_OTHER_INTERACT,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1705 NOTES,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1706 SITE_PLUSMINUS_7AA,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1707 ORGANISM
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1708 ) VALUES (?,?,?,?,?,?,?,?)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1709 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1710
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1711 foreach my $peptide (keys %data) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1712 if (exists($domain_2{$peptide}) and (defined $domain_2{$peptide}) and (not $domain_2{$peptide} eq "") ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1713 $psp_regulatory_site_stmth->bind_param(1, $domain_2{$peptide});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1714 $psp_regulatory_site_stmth->bind_param(2, $ON_FUNCTION_2{$peptide});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1715 $psp_regulatory_site_stmth->bind_param(3, $ON_PROCESS_2{$peptide});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1716 $psp_regulatory_site_stmth->bind_param(4, $ON_PROT_INTERACT_2{$peptide});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1717 $psp_regulatory_site_stmth->bind_param(5, $ON_OTHER_INTERACT_2{$peptide});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1718 $psp_regulatory_site_stmth->bind_param(6, $notes_2{$peptide});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1719 $psp_regulatory_site_stmth->bind_param(7, $seq_plus7aa_2{$peptide});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1720 $psp_regulatory_site_stmth->bind_param(8, $organism_2{$peptide});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1721 if (not $psp_regulatory_site_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1722 print "Error writing PSP_Regulatory_site for one regulatory site with peptide '$domain_2{$peptide}': $psp_regulatory_site_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1723 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1724 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1725 } elsif (exists($domain_2{$peptide}) and (not defined $domain_2{$peptide})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1726 print "\$domain_2{$peptide} is undefined\n"; #ACE
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1727 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1728 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1729
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1730 $dbh->{AutoCommit} = $auto_commit;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1731 # auto_commit implicitly finishes psp_regulatory_site_stmth, apparently # $psp_regulatory_site_stmth->finish;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1732 $dbh->disconnect if ( defined $dbh );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1733
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1734
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1735 ($end_seconds, $end_microseconds) = gettimeofday;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1736
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1737 $delta_seconds = $end_seconds - $start_seconds;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1738 $delta_microseconds = $end_microseconds - $start_microseconds;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1739 $delta_microseconds += 1000000 * $delta_seconds;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1740 $key_count = keys(%data);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1741 print sprintf("Write time is %d microseconds\n", ($delta_microseconds));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1742
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1743 print "... Finished find sequences that match the NetworKIN predictions and find motifs that match observed sequences at " . format_localtime_iso8601() ."\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1744
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1745 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1746 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1747 # Print to the output file
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1748 #
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1749 ###############################################################################################################################
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1750 open (OUT, ">$file_out") || die "could not open the fileout: $file_out";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1751 open (MELT, ">$file_melt") || die "could not open the fileout: $file_melt";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1752
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1753 # print the header info
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1754 print MELT "phospho_peptide\tgene_names\tsite_type\tkinase_map\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1755 print OUT "p-peptide\tProtein description\tGene name(s)\tFASTA name\tPhospho-sites\tUnique phospho-motifs, no residue numbers\tAccessions\tPhospho-motifs for all members of protein group with residue numbers\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1756
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1757 # print the PhosphoSite regulatory data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1758 print OUT "Domain\tON_FUNCTION\tON_PROCESS\tON_PROT_INTERACT\tON_OTHER_INTERACT\tPhosphoSite notes\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1759
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1760 # print the sample names
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1761 for my $i (0 .. $#samples) { print OUT "$samples[$i]\t"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1762
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1763 # print the kinases and groups
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1764 for my $i (0 .. $#kinases_observed) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1765 my $temp = $kinases_observed[$i]."_NetworKIN";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1766 print OUT "$temp\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1767 push(@kinases_observed_lbl, $temp);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1768 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1769 for my $i (0 .. $#motif_sequence) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1770 print OUT "$motif_type{$motif_sequence[$i]} ($motif_sequence[$i])\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1771 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1772 for my $i (0 .. $#kinases_PhosphoSite) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1773 my $temp = $kinases_PhosphoSite[$i]."_PhosphoSite";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1774 if ($i < $#kinases_PhosphoSite) { print OUT "$temp\t"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1775 if ($i == $#kinases_PhosphoSite) { print OUT "$temp\n"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1776 push(@phosphosites_observed_lbl, $temp);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1777 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1778
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1779 # begin DDL-to-SQLite
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1780 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1781 $dbh = DBI->connect("dbi:SQLite:$db_out", undef, undef);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1782 $auto_commit = $dbh->{AutoCommit};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1783 $dbh->{AutoCommit} = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1784 print "DB connection $dbh is to $db_out, opened for modification\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1785
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1786 my $sample_stmth;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1787 $sample_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1788 INSERT INTO sample (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1789 id,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1790 name
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1791 ) VALUES (?,?)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1792 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1793
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1794 my $ppep_intensity_stmth;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1795 $ppep_intensity_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1796 INSERT INTO ppep_intensity (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1797 ppep_id,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1798 sample_id,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1799 intensity
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1800 ) VALUES (?,?,?)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1801 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1802
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1803 my $site_type_stmth;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1804 $site_type_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1805 insert into site_type (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1806 id,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1807 type_name
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1808 ) values (?,?)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1809 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1810
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1811 my $ppep_gene_site_stmth;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1812 $ppep_gene_site_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1813 insert into ppep_gene_site (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1814 ppep_id,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1815 gene_names,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1816 kinase_map,
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1817 site_type_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1818 ) values (?,?,?,?)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1819 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1820
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1821 my $ppep_metadata_stmth;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1822 $ppep_metadata_stmth = $dbh->prepare("
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1823 INSERT INTO ppep_metadata
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1824 ( ppep_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1825 , protein_description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1826 , gene_name
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1827 , FASTA_name
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1828 , phospho_sites
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1829 , motifs_unique
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1830 , accessions
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1831 , motifs_all_members
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1832 , domain
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1833 , ON_FUNCTION
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1834 , ON_PROCESS
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1835 , ON_PROT_INTERACT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1836 , ON_OTHER_INTERACT
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1837 , notes
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1838 ) VALUES (
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1839 ?,?,?,?,?,?,?
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1840 , ?,?,?,?,?,?,?
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1841 )
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1842 ");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1843 # end DDL-to-SQLite
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1844 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1845
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1846 # begin store-to-SQLite "sample" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1847 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1848 # %sample_id_lut maps name -> ID
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1849 for my $sample_name (keys %sample_id_lut) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1850 $sample_stmth->bind_param( 2, $sample_name );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1851 $sample_stmth->bind_param( 1, $sample_id_lut{$sample_name} );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1852 if (not $sample_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1853 print "Error writing tuple ($sample_name,$sample_id_lut{$sample_name}): $sample_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1854 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1855 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1856 # end store-to-SQLite "sample" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1857 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1858
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1859 # begin store-to-SQLite "site_type" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1860 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1861 sub add_site_type {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1862 my ($site_type_id, $site_type_type_name) = @_;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1863 $site_type_stmth->bind_param( 2, $site_type_type_name );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1864 $site_type_stmth->bind_param( 1, $site_type_id );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1865 if (not $site_type_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1866 die "Error writing tuple ($site_type_id,$site_type_type_name): $site_type_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1867 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1868 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1869 add_site_type($SITE_KINASE_SUBSTRATE, $site_description{$SITE_KINASE_SUBSTRATE});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1870 add_site_type($SITE_MOTIF, $site_description{$SITE_MOTIF});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1871 add_site_type($SITE_PHOSPHOSITE, $site_description{$SITE_PHOSPHOSITE});
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1872 # end store-to-SQLite "site_type" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1873 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1874
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1875 foreach my $peptide (sort(keys %data)) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1876 next if (grep($peptide, @failed_matches));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1877 my $ppep_id = $ppep_id_lut{$peptide};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1878 my @ppep_metadata = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1879 my @ppep_intensity = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1880 my @gene = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1881 my $gene_names;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1882 my $j;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1883 # Print the peptide itself
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1884 # column 1: p-peptide
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1885 print OUT "$peptide\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1886 push (@ppep_metadata, $ppep_id);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1887 push (@ppep_intensity, $peptide);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1888
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1889 my $verbose_cond = 0; # $peptide eq 'AAAAAAAGDpSDpSWDADAFSVEDPVR' || $peptide eq 'KKGGpSpSDEGPEPEAEEpSDLDSGSVHSASGRPDGPVR';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1890 # skip over failed matches
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1891 print "\nfirst match for '$peptide' is '$matched_sequences{$peptide}[0]' and FAILED_MATCH_SEQ is '$FAILED_MATCH_SEQ'\n" if $verbose_cond;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1892 if ($matched_sequences{$peptide}[0] eq $FAILED_MATCH_SEQ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1893 # column 2: Protein description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1894 # column 3: Gene name(s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1895 # column 4: FASTA name
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1896 # column 5: phospho-residues
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1897 # Column 6: UNIQUE phospho-motifs
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1898 # Column 7: accessions
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1899 # Column 8: ALL motifs with residue numbers
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1900 # 2 3 4 5 6 7 8
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1901 print OUT "Sequence not found in FASTA database\tNA\tNA\tNA\tNA\tNA\tNA\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1902 print "No match found for '$peptide' in sequence database\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1903 $gene_names = '$FAILED_MATCH_GENE_NAME';
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1904 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1905 my @description = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1906 my %seen = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1907 # Print just the protein description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1908 for $i (0 .. $#{$names{$peptide}}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1909 my $long_name = $names{$peptide}[$i];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1910 my @naming_parts = split(/\sOS/, $long_name);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1911 my @front_half = split(/\s/, $naming_parts[0]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1912 push(@description, join(" ", @front_half[1..($#front_half)]));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1913 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1914 # column 2: Protein description
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1915 print OUT join(" /// ", @description), "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1916 push (@ppep_metadata, join(" /// ", @description));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1917
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1918 # Print just the gene name
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1919 for $i (0 .. $#{$names{$peptide}}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1920 my $tmp_gene = $names{$peptide}[$i];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1921 $tmp_gene =~ s/^.*GN=//;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1922 $tmp_gene =~ s/\s.*//;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1923 if (!exists($seen{$tmp_gene})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1924 push(@gene, $tmp_gene);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1925 $seen{$tmp_gene} = $tmp_gene;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1926 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1927 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1928 # column 3: Gene name(s)
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1929 $gene_names = join(" /// ", @gene);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1930 print OUT $gene_names, "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1931 push (@ppep_metadata, join(" /// ", @gene));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1932
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1933 # column 4: FASTA name
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1934 print OUT join(" /// ", @{$names{$peptide}}), "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1935 push (@ppep_metadata, join(" /// ", @{$names{$peptide}}));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1936
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1937 # column 5: phospho-residues
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1938 my $tmp_for_insert = "";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1939 my $foobar;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1940 for my $i (0 .. $#{ $matched_sequences{$peptide} } ) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1941 print "match $i for '$peptide' is '$matched_sequences{$peptide}[$i]'\n" if $verbose_cond;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1942 if ($i < $#{ $matched_sequences{$peptide} }) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1943 if (defined $p_residues{$peptide}{$i}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1944 @tmp_p_residues = @{$p_residues{$peptide}{$i}};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1945 for $j (0 .. $#tmp_p_residues) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1946 if ($j < $#tmp_p_residues) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1947 my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1948 print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1949 $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1950 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1951 elsif ($j == $#tmp_p_residues) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1952 my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1953 print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing /// ";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1954 $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing /// ";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1955 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1956 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1957 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1958 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1959 elsif ($i == $#{ $matched_sequences{$peptide} }) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1960 my $there_were_sites = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1961 if (defined $p_residues{$peptide}{$i}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1962 @tmp_p_residues = @{$p_residues{$peptide}{$i}};
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1963 if ($#tmp_p_residues > 0) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1964 for my $j (0 .. $#tmp_p_residues) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1965 if ($j < $#tmp_p_residues) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1966 if (defined $p_residues{$peptide}{$i}[$j]) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1967 my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1968 $foobar = $residues{$peptide}{$i}[$j];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1969 if (defined $foobar) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1970 print OUT "$foobar";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1971 print OUT "$tmp_site_for_printing, ";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1972 $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing, ";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1973 $there_were_sites = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1974 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1975 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1976 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1977 elsif ($j == $#tmp_p_residues) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1978 if (defined $p_residues{$peptide}{$i}[$j]) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1979 $foobar = $residues{$peptide}{$i}[$j];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1980 if (defined $foobar) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1981 my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1982 print OUT "$foobar";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1983 print OUT "$tmp_site_for_printing\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1984 $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1985 $there_were_sites = 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1986 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1987 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1988 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1989 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1990 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1991 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1992 if (0 == $there_were_sites) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1993 print OUT "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1994 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1995 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1996 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1997 print "tmp_for_insert '$tmp_for_insert' for '$peptide'\n" if $verbose_cond;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1998 push (@ppep_metadata, $tmp_for_insert);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
1999
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2000 # Column 6: UNIQUE phospho-motifs
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2001 print OUT join(" /// ", @{$unique_motifs{$peptide}}), "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2002 push (@ppep_metadata, join(" /// ", @{$unique_motifs{$peptide}}));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2003
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2004 # Column 7: accessions
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2005 if (defined $accessions{$peptide}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2006 print OUT join(" /// ", @{$accessions{$peptide}}), "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2007 push (@ppep_metadata, join(" /// ", @{$accessions{$peptide}}));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2008 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2009 print OUT "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2010 push (@ppep_metadata, "");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2011 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2012
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2013 # Column 8: ALL motifs with residue numbers
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2014 if (defined $p_motifs{$peptide}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2015 print OUT join(" /// ", @{$p_motifs{$peptide}}), "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2016 push (@ppep_metadata, join(" /// ", @{$p_motifs{$peptide}}));
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2017 } else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2018 print OUT "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2019 push (@ppep_metadata, "");
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2020 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2021
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2022 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2023
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2024 # Print the PhosphoSite regulatory data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2025
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2026 if (defined $domain_2{$peptide}) { print OUT "$domain_2{$peptide}\t"; } else { print OUT "\t"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2027 if (defined $ON_FUNCTION_2{$peptide}) { print OUT "$ON_FUNCTION_2{$peptide}\t"; } else { print OUT "\t"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2028 if (defined $ON_PROCESS_2{$peptide}) { print OUT "$ON_PROCESS_2{$peptide}\t"; } else { print OUT "\t"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2029 if (defined $ON_PROT_INTERACT_2{$peptide}) { print OUT "$ON_PROT_INTERACT_2{$peptide}\t"; } else { print OUT "\t"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2030 if (defined $ON_OTHER_INTERACT_2{$peptide}) { print OUT "$ON_OTHER_INTERACT_2{$peptide}\t"; } else { print OUT "\t"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2031 if (defined $notes_2{$peptide}) { print OUT "$notes_2{$peptide}\t"; } else { print OUT "\t"; }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2032
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2033 if (defined $domain_2{$peptide}) { push (@ppep_metadata, $domain_2{$peptide}); } else { push(@ppep_metadata, ""); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2034 if (defined $ON_FUNCTION_2{$peptide}) { push (@ppep_metadata, $ON_FUNCTION_2{$peptide}); } else { push(@ppep_metadata, ""); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2035 if (defined $ON_PROCESS_2{$peptide}) { push (@ppep_metadata, $ON_PROCESS_2{$peptide}); } else { push(@ppep_metadata, ""); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2036 if (defined $ON_PROT_INTERACT_2{$peptide}) { push (@ppep_metadata, $ON_PROT_INTERACT_2{$peptide}); } else { push(@ppep_metadata, ""); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2037 if (defined $ON_OTHER_INTERACT_2{$peptide}) { push (@ppep_metadata, $ON_OTHER_INTERACT_2{$peptide}); } else { push(@ppep_metadata, ""); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2038 if (defined $notes_2{$peptide}) { push (@ppep_metadata, $notes_2{$peptide}); } else { push(@ppep_metadata, ""); }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2039
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2040 # begin store-to-SQLite "ppep_metadata" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2041 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2042 for $i (1..14) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2043 $ppep_metadata_stmth->bind_param($i, $ppep_metadata[$i-1]);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2044 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2045 if (not $ppep_metadata_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2046 print "Error writing ppep_metadata row for phosphopeptide $ppep_metadata[$i]: $ppep_metadata_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2047 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2048 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2049 # end store-to-SQLite "ppep_metadata" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2050
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2051 # Print the data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2052 @tmp_data = ();
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2053 foreach (@{$data{$peptide}}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2054 push(@tmp_data, $_);
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2055 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2056 print OUT join("\t", @tmp_data), "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2057
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2058 # begin store-to-SQLite "ppep_intensity" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2059 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2060 # commit the sample intensities
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2061 $i = 0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2062 foreach (@{$data{$peptide}}) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2063 my $intense = $_;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2064 $ppep_intensity_stmth->bind_param( 1, $ppep_id );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2065 $ppep_intensity_stmth->bind_param( 2, $sample_id_lut{$samples[$i]} );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2066 $ppep_intensity_stmth->bind_param( 3, $intense );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2067 if (not $ppep_intensity_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2068 print "Error writing tuple ($peptide,$samples[$i],$intense): $ppep_intensity_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2069 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2070 $i += 1;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2071 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2072 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2073 # end store-to-SQLite "ppep_intensity" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2074
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2075 # print the kinase-substrate data
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2076 for my $i (0 .. $#kinases_observed) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2077 if (exists($kinase_substrate_NetworKIN_matches{$peptide}{$kinases_observed[$i]})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2078 print OUT "X\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2079 my $NetworKIN_label = $kinases_observed[$i]."_NetworKIN";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2080 print MELT "$peptide\t$gene_names\t$site_description{$SITE_KINASE_SUBSTRATE}\t$NetworKIN_label\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2081 # begin store-to-SQLite "ppep_gene_site" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2082 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2083 $ppep_gene_site_stmth->bind_param(1, $ppep_id); # ppep_gene_site.ppep_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2084 $ppep_gene_site_stmth->bind_param(2, $gene_names); # ppep_gene_site.gene_names
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2085 $ppep_gene_site_stmth->bind_param(3, $NetworKIN_label); # ppep_gene_site.kinase_map
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2086 $ppep_gene_site_stmth->bind_param(4, $SITE_KINASE_SUBSTRATE); # ppep_gene_site.site_type_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2087 if (not $ppep_gene_site_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2088 print "Error writing tuple ($peptide,$gene_names,$kinases_observed[$i]): $ppep_gene_site_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2089 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2090 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2091 # end store-to-SQLite "ppep_gene_site" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2092 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2093 else { print OUT "\t";}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2094 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2095 my %wrote_motif;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2096 my $motif_parts_0;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2097 for my $i (0 .. $#motif_sequence) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2098 if (exists($kinase_motif_matches{$peptide}{$motif_sequence[$i]})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2099 print OUT "X\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2100 $motif_parts_0 = $motif_type{$motif_sequence[$i]}." ".$motif_sequence[$i];
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2101 my $key = "$peptide\t$gene_names\t$motif_parts_0";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2102 if (!exists($wrote_motif{$key})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2103 $wrote_motif{$key} = $key;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2104 print MELT "$peptide\t$gene_names\t$site_description{$SITE_MOTIF}\t$motif_parts_0\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2105 # print "Line 657: i is $i\t$kinase_motif_matches{$peptide}{$motif_sequence[$i]}\n"; #debug
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2106 # begin store-to-SQLite "ppep_gene_site" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2107 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2108 $ppep_gene_site_stmth->bind_param(1, $ppep_id); # ppep_gene_site.ppep_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2109 $ppep_gene_site_stmth->bind_param(2, $gene_names); # ppep_gene_site.gene_names
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2110 $ppep_gene_site_stmth->bind_param(3, $motif_parts_0); # ppep_gene_site.kinase_map
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2111 $ppep_gene_site_stmth->bind_param(4, $SITE_MOTIF); # ppep_gene_site.site_type_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2112 if (not $ppep_gene_site_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2113 print "Error writing tuple ($peptide,$gene_names,$motif_parts_0): $ppep_gene_site_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2114 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2115 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2116 # end store-to-SQLite "ppep_gene_site" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2117 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2118 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2119 else { print OUT "\t";}
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2120 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2121 for my $i (0 .. $#kinases_PhosphoSite) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2122 if (exists($kinase_substrate_PhosphoSite_matches{$peptide}{$kinases_PhosphoSite[$i]})) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2123 print MELT "$peptide\t$gene_names\t$site_description{$SITE_PHOSPHOSITE}\t$phosphosites_observed_lbl[$i]\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2124 if ($i < $#kinases_PhosphoSite) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2125 print OUT "X\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2126 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2127 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2128 print OUT "X\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2129 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2130 # begin store-to-SQLite "ppep_gene_site" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2131 # ---
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2132 $ppep_gene_site_stmth->bind_param(1, $ppep_id); # ppep_gene_site.ppep_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2133 $ppep_gene_site_stmth->bind_param(2, $gene_names); # ppep_gene_site.gene_names
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2134 $ppep_gene_site_stmth->bind_param(3, $phosphosites_observed_lbl[$i]); # ppep_gene_site.kinase_map
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2135 $ppep_gene_site_stmth->bind_param(4, $SITE_PHOSPHOSITE); # ppep_gene_site.site_type_id
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2136 if (not $ppep_gene_site_stmth->execute()) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2137 print "Error writing tuple ($peptide,$gene_names,$phosphosites_observed_lbl[$i]): $ppep_gene_site_stmth->errstr\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2138 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2139 # ...
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2140 # end store-to-SQLite "ppep_gene_site" table
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2141 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2142 else {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2143 if ($i < $#kinases_PhosphoSite) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2144 print OUT "\t";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2145 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2146 elsif ($i == $#kinases_PhosphoSite) {
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2147 print OUT "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2148 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2149 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2150 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2151 }
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2152
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2153 close OUT;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2154 close MELT;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2155 $ppep_gene_site_stmth->finish;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2156 print "begin DB commit at " . format_localtime_iso8601() . "\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2157 $dbh->{AutoCommit} = $auto_commit;
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2158 $dbh->disconnect if ( defined $dbh );
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2159
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2160 print "\nFinished writing output at " . format_localtime_iso8601() ."\n\n";
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2161
ba5f14c2a4af "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents: 7
diff changeset
2162 ###############################################################################################################################