Mercurial > repos > iuc > ena_webin_cli
annotate process_input.py @ 0:7f669682f4ac draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
| author | iuc |
|---|---|
| date | Mon, 06 Oct 2025 12:13:07 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
1 import json |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
2 import os |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
3 import sys |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
4 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
5 import yaml |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
6 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
7 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
8 def get_section_string(f, start_line, end_line, return_string=False): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
9 # consume starting lines |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
10 start_string = iter(f.readline, start_line) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
11 start_string = ''.join(line for line in start_string) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
12 # read YAML lines |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
13 yaml_string = iter(f.readline, end_line) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
14 if return_string: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
15 return ''.join(x for x in yaml_string) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
16 else: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
17 return [x for x in yaml_string] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
18 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
19 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
20 def fill_from_yaml_data(yaml_only_dict, studies_samples_dict): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
21 # fill experiment information (platform) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
22 for index, exp in yaml_only_dict['ENA_experiment'].items(): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
23 study_alias = exp['study_alias'] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
24 sample_alias = exp['sample_alias'] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
25 if study_alias in studies_samples_dict.keys(): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
26 if sample_alias in studies_samples_dict[study_alias].keys(): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
27 studies_samples_dict[study_alias][sample_alias]['experiments'].append({'platform': exp['platform']}) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
28 else: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
29 studies_samples_dict[study_alias][sample_alias] = {'experiments': [{'platform': exp['platform']}]} |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
30 else: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
31 studies_samples_dict[study_alias] = { |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
32 sample_alias: {'experiments': [{'platform': exp['platform']}]} |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
33 } |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
34 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
35 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
36 def load_receipt_data(input_file_path): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
37 # should do some health check of the input file? |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
38 # load yaml section |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
39 loaded_data = {} |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
40 yaml_delimiter = 'YAML -------------\n' |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
41 with open(input_file_path) as input_file: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
42 yaml_only_section = yaml.safe_load( |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
43 get_section_string(input_file, start_line=yaml_delimiter, end_line=yaml_delimiter, return_string=True) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
44 ) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
45 fill_from_yaml_data(yaml_only_section, loaded_data) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
46 # read study accessions |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
47 study_delimiter = 'Study accession details:\n' |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
48 end_line = '\n' |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
49 with open(input_file_path) as input_file: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
50 studies_accession_lines = get_section_string(input_file, start_line=study_delimiter, end_line=end_line) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
51 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
52 for study_line in studies_accession_lines: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
53 if study_line != '\n': |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
54 alias, accession, *_ = study_line.split('\t') |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
55 try: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
56 loaded_data[alias]['accession'] = accession |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
57 except KeyError: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
58 print(f"Experiment {alias} has unknown study or sample") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
59 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
60 samples_delimiter = 'Sample accession details:\n' |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
61 with open(input_file_path) as input_file: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
62 samples_accession_lines = get_section_string(input_file, start_line=samples_delimiter, end_line=end_line) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
63 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
64 for sample_line in samples_accession_lines: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
65 if sample_line != '\n': |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
66 alias, accession, *_ = sample_line.split('\t') |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
67 for study in loaded_data.keys(): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
68 if alias in loaded_data[study].keys(): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
69 loaded_data[study][alias]['accession'] = accession |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
70 break |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
71 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
72 return loaded_data |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
73 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
74 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
75 def main(): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
76 input_file_path = sys.argv[1] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
77 fasta_names_list_path = sys.argv[2] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
78 out_manifest_base = sys.argv[3] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
79 manifest_template = sys.argv[4] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
80 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
81 # load submitted data from receipt file |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
82 data_dict = load_receipt_data(input_file_path) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
83 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
84 # iterate over the list of fasta files |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
85 with open(fasta_names_list_path, 'r') as fasta_files_json_file: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
86 fasta_files_list = json.load(fasta_files_json_file) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
87 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
88 with open('submit_list.tab', 'w') as written_manifests_out: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
89 for fasta_file in fasta_files_list: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
90 if fasta_file.endswith('.fasta.gz'): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
91 sample_alias = fasta_file[:-9] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
92 else: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
93 sample_alias = fasta_file[:-6] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
94 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
95 print(f'Processing {sample_alias}') |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
96 found_metadata = False |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
97 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
98 for study_alias in data_dict.keys(): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
99 if sample_alias in data_dict[study_alias].keys(): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
100 sample_accession = data_dict[study_alias][sample_alias]['accession'] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
101 study_accession = data_dict[study_alias]['accession'] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
102 # TODO: get a string that concatenates platform information from multiple experiments |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
103 platform = data_dict[study_alias][sample_alias]['experiments'][0]['platform'] |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
104 manifest_path = os.path.join(out_manifest_base, sample_alias + '.manifest.txt') |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
105 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
106 with open(manifest_path, "w") as output_handle: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
107 # dump the contents of manifest template containing global vars |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
108 with open(manifest_template) as m_template: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
109 output_handle.write(m_template.read()) |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
110 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
111 output_handle.write("ASSEMBLYNAME\tconsensus_" + sample_alias + "\n") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
112 output_handle.write("PLATFORM\t" + platform + "\n") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
113 output_handle.write("STUDY\t" + study_accession + "\n") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
114 output_handle.write("SAMPLE\t" + sample_accession + "\n") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
115 output_handle.write("FASTA\t" + sample_alias + '.fasta.gz' + "\n") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
116 agp_path = os.path.join("./fasta", sample_alias + ".agp") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
117 if os.path.exists(agp_path): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
118 output_handle.write("AGP\t" + sample_alias + ".agp\n") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
119 chr_list_path = os.path.join("./fasta", sample_alias + ".tsv.gz") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
120 if os.path.exists(chr_list_path): |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
121 output_handle.write("CHROMOSOME_LIST\t" + sample_alias + ".tsv.gz\n") |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
122 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
123 found_metadata = True |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
124 written_manifests_out.write(manifest_path + '\n') |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
125 break |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
126 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
127 if not found_metadata: |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
128 print(f'No metadata found for sample {sample_alias}') |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
129 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
130 |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
131 if __name__ == '__main__': |
|
7f669682f4ac
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents:
diff
changeset
|
132 main() |
