Mercurial > repos > proteore > proteore_reactome
annotate reactome_analysis.py @ 6:eb051cba2bda draft default tip
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
| author | proteore |
|---|---|
| date | Mon, 10 May 2021 15:22:24 +0000 |
| parents | 34097acf457c |
| children |
| rev | line source |
|---|---|
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
1 import argparse |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
2 import csv |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
3 import json |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
4 import os |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
5 import re |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
6 |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
7 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
8 |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
9 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
10 def id_valid(identifiers): |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
11 """ |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
12 Validate IDs if they contain special characters |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
13 """ |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
14 res = [] |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
15 remove = [] |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
16 for id in identifiers: |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
17 id = id.split(";")[0] |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
18 if re.match("^[A-Za-z0-9_-]*$", id): |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
19 res.append(id) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
20 else: |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
21 remove.append(id) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
22 return res, remove |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
23 |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
24 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
25 def isnumber(format, n): |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
26 """ |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
27 Check if an variable is numeric |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
28 """ |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
29 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
30 int_format = re.compile(r"^[-]?[1-9][0-9]*$") |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
31 test = "" |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
32 if format == "int": |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
33 test = re.match(int_format, n) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
34 elif format == "float": |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
35 test = re.match(float_format, n) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
36 if test: |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
37 return True |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
38 else: |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
39 return False |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
40 |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
41 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
42 def data_json(identifiers): |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
43 """ |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
44 Submit IDs list to Reactome and return results in json format |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
45 Return error in HTML format if web service is not available |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
46 """ |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
47 trash = [] |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
48 if identifiers[1] == "list": |
|
5
34097acf457c
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents:
3
diff
changeset
|
49 ids = identifiers[0].split() |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
50 ids = [x.split(";") for x in ids] |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
51 ids = [item.strip() for sublist in ids for item in sublist if item != ''] # noqa 501 |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
52 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read() # noqa 501 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
53 if len(id_valid(identifiers[0].split())[1]) > 0: |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
54 trash = id_valid(identifiers[0].split())[1] |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
55 elif identifiers[1] == "file": |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
56 header = identifiers[2] |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
57 with open(identifiers[0], "r") as mq: |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
58 file_content = csv.reader(mq, delimiter="\t") |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
59 file_content = list(file_content) # csv object to list |
|
5
34097acf457c
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents:
3
diff
changeset
|
60 ncol = identifiers[3] |
|
34097acf457c
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents:
3
diff
changeset
|
61 if isnumber("int", ncol.replace("c", "")): |
|
34097acf457c
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents:
3
diff
changeset
|
62 if header == "true": |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
63 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501 |
|
5
34097acf457c
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents:
3
diff
changeset
|
64 else: |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
65 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501 |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
66 # flat list of list of lists, remove empty items |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
67 idens = [item.strip() for sublist in idens for item in sublist if item != ''] # noqa 501 |
|
5
34097acf457c
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents:
3
diff
changeset
|
68 ids = "\n".join(id_valid(idens)[0]) |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
69 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read() # noqa 501 |
|
5
34097acf457c
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents:
3
diff
changeset
|
70 if len(id_valid(idens)[1]) > 0: |
|
34097acf457c
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
proteore
parents:
3
diff
changeset
|
71 trash = id_valid(idens)[1] |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
72 # print(json_string) |
|
2
db63d069b5e6
planemo upload commit a0b73386d86dc3d60ad33b944dad88fd5ff4d9ab-dirty
proteore
parents:
1
diff
changeset
|
73 j = json.loads(json_string) |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
74 print("Identifiers not found: " + str(j["identifiersNotFound"])) |
|
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
75 print("Pathways found: " + str(j["pathwaysFound"])) |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
76 return json_string, trash |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
77 |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
78 |
|
1
8200968789c1
planemo upload commit 1b95e5bc85662f10cdd6305587ccee8faf9a2354-dirty
proteore
parents:
0
diff
changeset
|
79 def write_output(filename, json_string, species, trash_file, trash): |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
80 """ |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
81 Replace json result in template and print to output |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
82 """ |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
83 template = open(os.path.join(CURRENT_DIR, "template.html")) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
84 output = open(filename, "w") |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
85 try: |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
86 for line in template: |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
87 if "{token}" in line: |
|
1
8200968789c1
planemo upload commit 1b95e5bc85662f10cdd6305587ccee8faf9a2354-dirty
proteore
parents:
0
diff
changeset
|
88 line = line.replace("{species}", species) |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
89 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) # noqa 501 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
90 output.write(line) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
91 except ValueError: |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
92 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") # noqa 501 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
93 template.close() |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
94 output.close() |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
95 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
96 if trash: |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
97 # print(trash) |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
98 trash_out = open(trash_file, "w") |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
99 trash_out.write("\n".join(trash)) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
100 trash_out.close() |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
101 |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
102 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
103 def options(): |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
104 parser = argparse.ArgumentParser() |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
105 argument = parser.add_argument("--json", nargs="+", required=True) |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
106 argument = parser.add_argument("--output", default="output.html") |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
107 argument = parser.add_argument("--trash", default="trash.txt") |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
108 argument = parser.add_argument("--species", default="48887") # noqa 841 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
109 args = parser.parse_args() |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
110 filename = args.output |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
111 json_string, trash = data_json(args.json) |
|
1
8200968789c1
planemo upload commit 1b95e5bc85662f10cdd6305587ccee8faf9a2354-dirty
proteore
parents:
0
diff
changeset
|
112 write_output(filename, json_string, args.species, args.trash, trash) |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
113 |
|
6
eb051cba2bda
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
proteore
parents:
5
diff
changeset
|
114 |
|
0
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
115 if __name__ == "__main__": |
|
19d8daa1eb2e
planemo upload commit 170560760f17fd1b77efe8bb95fedf3eb2433f0b-dirty
proteore
parents:
diff
changeset
|
116 options() |
