Mercurial > repos > tduigou > domestication
annotate domestication.py @ 0:8fd9af4cb080 draft
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
| author | tduigou |
|---|---|
| date | Mon, 12 May 2025 10:21:24 +0000 |
| parents | |
| children | f78651af72e4 |
| rev | line source |
|---|---|
|
0
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
1 import os |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
2 import argparse |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
3 import pandas |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
4 import zipfile |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
5 import genedom |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
6 import dnacauldron |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
7 import proglog |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
8 import shutil |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
9 #proglog.notebook() |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
10 from Bio.SeqRecord import SeqRecord |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
11 from Bio.Seq import Seq |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
12 from Bio import SeqIO |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
13 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
14 def domestication(files_to_domestication, csv_file,file_name_mapping, use_file_names_as_id, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
15 allow_edits, output_dom): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
16 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
17 file_to_domestication = files_to_domestication.split(',') |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
18 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
19 records_to_domesticate = dnacauldron.biotools.load_records_from_files( |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
20 files=file_to_domestication, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
21 folder=None, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
22 use_file_names_as_ids=use_file_names_as_id |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
23 ) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
24 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
25 #refine the real record name dict |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
26 if isinstance(file_name_mapping, str): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
27 file_name_mapping = dict( |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
28 item.split(":") for item in file_name_mapping.split(",") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
29 ) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
30 real_names = { |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
31 os.path.splitext(os.path.basename(k))[0]: v.replace(".gb", "") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
32 for k, v in file_name_mapping.items() |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
33 } |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
34 updated_records = [] |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
35 for record in records_to_domesticate: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
36 original_id = record.id |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
37 if original_id in real_names: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
38 new_id = real_names[original_id] |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
39 record.id = new_id |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
40 updated_records.append(record) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
41 ######################################################## |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
42 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
43 df=pandas.read_csv(csv_file) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
44 EMMA_PLUS = genedom.GoldenGateDomesticator.standard_from_spreadsheet(dataframe=df) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
45 genedom.batch_domestication( |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
46 records=updated_records, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
47 standard=EMMA_PLUS, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
48 allow_edits=allow_edits, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
49 target=output_dom) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
50 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
51 # Check if any names were truncated: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
52 if isinstance(output_dom, str) and output_dom.endswith(".zip"): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
53 with zipfile.ZipFile(output_dom, "r") as zipf: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
54 with zipf.open("order_ids.csv") as f: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
55 order_ids = pandas.read_csv(f) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
56 else: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
57 order_ids = pandas.read_csv(os.path.join(output_dom, "order_ids.csv")) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
58 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
59 any_truncated = False |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
60 for index, row in order_ids.iterrows(): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
61 if row["sequence"] != row["order_id"]: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
62 any_truncated = True |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
63 print("Changed names:", end=" ") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
64 print(" --> ".join(row)) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
65 if not any_truncated: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
66 print("Part names were not truncated") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
67 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
68 #zip compressing |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
69 if os.path.isdir(output_dom): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
70 zip_path = output_dom.rstrip("/\\") + ".zip" |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
71 shutil.make_archive(output_dom, 'zip', output_dom) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
72 shutil.move(zip_path, output_dom) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
73 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
74 return output_dom |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
75 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
76 def methylation_protection(domestication_target, output_methprot): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
77 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
78 if domestication_target.endswith(".zip"): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
79 extracted_dir = "extracted_genbanks" |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
80 with zipfile.ZipFile(domestication_target, "r") as zipf: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
81 os.makedirs(extracted_dir, exist_ok=True) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
82 for member in zipf.namelist(): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
83 if member.startswith("domesticated_genbanks/") and member.endswith(".gb"): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
84 zipf.extract(member, path=extracted_dir) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
85 gb_folder = os.path.join(extracted_dir, "domesticated_genbanks") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
86 else: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
87 gb_folder = os.path.join(domestication_target, "domesticated_genbanks") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
88 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
89 records_to_protect = dnacauldron.biotools.load_records_from_files(folder=gb_folder, use_file_names_as_ids=True) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
90 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
91 for record in records_to_protect: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
92 new_seqrecord = SeqRecord("TTC") + record + SeqRecord("GAA") # these sequences are designed to prevent Dcm recognition site formation |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
93 new_seqrecord.id = record.id |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
94 new_seqrecord.name = record.name |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
95 new_seqrecord.annotations = {"molecule_type": "DNA"} |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
96 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
97 target_file = os.path.join(output_methprot, new_seqrecord.id + ".gb") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
98 with open(target_file, "w") as output_handle: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
99 SeqIO.write(new_seqrecord, output_handle, "genbank") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
100 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
101 def parse_command_line_args(): |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
102 parser = argparse.ArgumentParser(description="Domestication") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
103 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
104 parser.add_argument("--files_to_domestication", required=True, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
105 help="List of GenBank files (Comma-separated)") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
106 parser.add_argument("--csv_file", required=True, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
107 help="csv file") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
108 parser.add_argument('--file_name_mapping', type=str, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
109 help='Mapping of Galaxy filenames to original filenames') |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
110 parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
111 help="Use file names as IDs (True/False)") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
112 parser.add_argument("--allow_edits", type=lambda x: x.lower() == 'true', default=True, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
113 help="Allow sequence edits") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
114 parser.add_argument("--output_dom", required=True, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
115 help="zip output for domestication results") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
116 parser.add_argument("--output_methprot", required=False, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
117 help="gb output for methylation protection") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
118 parser.add_argument("--methylation_protection", type=lambda x: x.lower() == 'true', default=False, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
119 help="Enable methyl protection (true/false)") |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
120 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
121 return parser.parse_args() |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
122 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
123 if __name__ == "__main__": |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
124 args = parse_command_line_args() |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
125 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
126 domestication( |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
127 args.files_to_domestication, args.csv_file, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
128 args.file_name_mapping, args.use_file_names_as_id, |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
129 args.allow_edits, args.output_dom |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
130 ) |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
131 |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
132 if args.methylation_protection: |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
133 methylation_protection( |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
134 args.output_dom, args.output_methprot |
|
8fd9af4cb080
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
135 ) |
