annotate reactome_analysis.py @ 6:eb051cba2bda draft default tip

"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
author proteore
date Mon, 10 May 2021 15:22:24 +0000
parents 34097acf457c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
1 import argparse
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
2 import csv
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
3 import json
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
4 import os
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
5 import re
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
6
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
7 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
8
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
9
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
10 def id_valid(identifiers):
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
11 """
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
12 Validate IDs if they contain special characters
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
13 """
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
14 res = []
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
15 remove = []
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
16 for id in identifiers:
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
17 id = id.split(";")[0]
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
18 if re.match("^[A-Za-z0-9_-]*$", id):
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
19 res.append(id)
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
20 else:
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
21 remove.append(id)
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
22 return res, remove
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
23
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
24
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
25 def isnumber(format, n):
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
26 """
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
27 Check if an variable is numeric
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
28 """
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
29 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$")
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
30 int_format = re.compile(r"^[-]?[1-9][0-9]*$")
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
31 test = ""
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
32 if format == "int":
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
33 test = re.match(int_format, n)
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
34 elif format == "float":
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
35 test = re.match(float_format, n)
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
36 if test:
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
37 return True
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
38 else:
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
39 return False
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
40
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
41
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
42 def data_json(identifiers):
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
43 """
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
44 Submit IDs list to Reactome and return results in json format
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
45 Return error in HTML format if web service is not available
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
46 """
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
47 trash = []
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
48 if identifiers[1] == "list":
5
34097acf457c planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents: 3
diff changeset
49 ids = identifiers[0].split()
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
50 ids = [x.split(";") for x in ids]
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
51 ids = [item.strip() for sublist in ids for item in sublist if item != ''] # noqa 501
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
52 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read() # noqa 501
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
53 if len(id_valid(identifiers[0].split())[1]) > 0:
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
54 trash = id_valid(identifiers[0].split())[1]
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
55 elif identifiers[1] == "file":
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
56 header = identifiers[2]
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
57 with open(identifiers[0], "r") as mq:
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
58 file_content = csv.reader(mq, delimiter="\t")
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
59 file_content = list(file_content) # csv object to list
5
34097acf457c planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents: 3
diff changeset
60 ncol = identifiers[3]
34097acf457c planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents: 3
diff changeset
61 if isnumber("int", ncol.replace("c", "")):
34097acf457c planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents: 3
diff changeset
62 if header == "true":
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
63 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501
5
34097acf457c planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents: 3
diff changeset
64 else:
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
65 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
66 # flat list of list of lists, remove empty items
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
67 idens = [item.strip() for sublist in idens for item in sublist if item != ''] # noqa 501
5
34097acf457c planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents: 3
diff changeset
68 ids = "\n".join(id_valid(idens)[0])
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
69 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read() # noqa 501
5
34097acf457c planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents: 3
diff changeset
70 if len(id_valid(idens)[1]) > 0:
34097acf457c planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents: 3
diff changeset
71 trash = id_valid(idens)[1]
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
72 # print(json_string)
2
db63d069b5e6 planemo upload commit a0b73386d86dc3d60ad33b944dad88fd5ff4d9ab-dirty
proteore
parents: 1
diff changeset
73 j = json.loads(json_string)
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
74 print("Identifiers not found: " + str(j["identifiersNotFound"]))
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
75 print("Pathways found: " + str(j["pathwaysFound"]))
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
76 return json_string, trash
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
77
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
78
1
8200968789c1 planemo upload commit 1b95e5bc85662f10cdd6305587ccee8faf9a2354-dirty
proteore
parents: 0
diff changeset
79 def write_output(filename, json_string, species, trash_file, trash):
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
80 """
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
81 Replace json result in template and print to output
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
82 """
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
83 template = open(os.path.join(CURRENT_DIR, "template.html"))
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
84 output = open(filename, "w")
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
85 try:
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
86 for line in template:
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
87 if "{token}" in line:
1
8200968789c1 planemo upload commit 1b95e5bc85662f10cdd6305587ccee8faf9a2354-dirty
proteore
parents: 0
diff changeset
88 line = line.replace("{species}", species)
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
89 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) # noqa 501
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
90 output.write(line)
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
91 except ValueError:
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
92 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") # noqa 501
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
93 template.close()
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
94 output.close()
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
95
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
96 if trash:
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
97 # print(trash)
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
98 trash_out = open(trash_file, "w")
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
99 trash_out.write("\n".join(trash))
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
100 trash_out.close()
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
101
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
102
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
103 def options():
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
104 parser = argparse.ArgumentParser()
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
105 argument = parser.add_argument("--json", nargs="+", required=True)
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
106 argument = parser.add_argument("--output", default="output.html")
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
107 argument = parser.add_argument("--trash", default="trash.txt")
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
108 argument = parser.add_argument("--species", default="48887") # noqa 841
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
109 args = parser.parse_args()
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
110 filename = args.output
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
111 json_string, trash = data_json(args.json)
1
8200968789c1 planemo upload commit 1b95e5bc85662f10cdd6305587ccee8faf9a2354-dirty
proteore
parents: 0
diff changeset
112 write_output(filename, json_string, args.species, args.trash, trash)
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
113
6
eb051cba2bda "planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents: 5
diff changeset
114
0
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
115 if __name__ == "__main__":
19d8daa1eb2e planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff changeset
116 options()