comparison tomo_setup.py @ 59:feb2a5fc7c76 draft

"planemo upload for repository https://github.com/rolfverberg/galaxytools commit 9a07ab3099737ee0d99e82739b55048f89c36bc6"
author rv43
date Tue, 16 Aug 2022 16:55:50 +0000
parents bead50a4eadc
children 52db7707ff48
comparison
equal deleted inserted replaced
58:072a3637117e 59:feb2a5fc7c76
17 def __main__(): 17 def __main__():
18 18
19 # Parse command line arguments 19 # Parse command line arguments
20 parser = argparse.ArgumentParser( 20 parser = argparse.ArgumentParser(
21 description='Setup tomography reconstruction') 21 description='Setup tomography reconstruction')
22 parser.add_argument('-i', '--inputfiles', 22 parser.add_argument('--inputconfig',
23 nargs='+', 23 default='inputconfig.txt',
24 help='Input config from tool form')
25 parser.add_argument('--inputfiles',
24 default='inputfiles.txt', 26 default='inputfiles.txt',
25 help='Input file datasets or collections') 27 help='Input file collections')
26 parser.add_argument('-c', '--config', 28 parser.add_argument('-c', '--config',
27 help='Input config') 29 help='Input config file')
30 parser.add_argument('--num_theta',
31 help='Number of theta angles')
32 parser.add_argument('--theta_range',
33 help='Theta range (lower bound, upper bound)')
34 parser.add_argument('--output_config',
35 help='Output config')
36 parser.add_argument('--output_data',
37 help='Preprocessed tomography data')
28 parser.add_argument('-l', '--log', 38 parser.add_argument('-l', '--log',
29 type=argparse.FileType('w'), 39 type=argparse.FileType('w'),
30 default=sys.stdout, 40 default=sys.stdout,
31 help='Log file') 41 help='Log file')
32 parser.add_argument('-t', '--inputfile_types',
33 nargs='+',
34 default='collection',
35 help='Input files type (collection or a list of set types: dark, bright, or data)')
36 parser.add_argument('--theta_range',
37 help='Theta range (lower bound, upper bound, number of angles)')
38 parser.add_argument('--output_config',
39 help='Output config')
40 parser.add_argument('--output_data',
41 help='Preprocessed tomography data')
42 parser.add_argument('tomo_ranges', metavar='N', type=int, nargs='+')
43 args = parser.parse_args() 42 args = parser.parse_args()
44 43
45 # Starting memory monitoring 44 # Starting memory monitoring
46 tracemalloc.start() 45 tracemalloc.start()
47 46
52 if not isinstance(level, int): 51 if not isinstance(level, int):
53 raise ValueError(f'Invalid log_level: {log_level}') 52 raise ValueError(f'Invalid log_level: {log_level}')
54 logging.basicConfig(format=logging_format, level=level, force=True, 53 logging.basicConfig(format=logging_format, level=level, force=True,
55 handlers=[logging.StreamHandler()]) 54 handlers=[logging.StreamHandler()])
56 55
57 logging.debug(f'config = {args.config}') 56 logging.info(f'config = {args.config}')
58 logging.debug(f'inputfiles = {args.inputfiles}') 57 logging.info(f'num_theta = {args.num_theta}')
59 logging.debug(f'inputfile_types = {args.inputfile_types}') 58 if args.theta_range is None:
60 logging.debug(f'log = {args.log}') 59 logging.info(f'theta_range = {args.theta_range}')
60 else:
61 logging.info(f'theta_range = {args.theta_range.split()}')
62 logging.info(f'output_config = {args.output_config}')
63 logging.info(f'output_data = {args.output_data}')
64 logging.info(f'log = {args.log}')
61 logging.debug(f'is log stdout? {args.log is sys.stdout}') 65 logging.debug(f'is log stdout? {args.log is sys.stdout}')
62 logging.debug(f'theta_range = {args.theta_range.split()}') 66
63 logging.debug(f'output_config = {args.output_config}') 67 # Read tool config input
64 logging.debug(f'output_data = {args.output_data}') 68 inputconfig = []
65 logging.debug(f'tomoranges = {args.tomo_ranges}') 69 with open(args.inputconfig) as f:
66 70 inputconfig = [line.strip() for line in f if line.strip() and not line.startswith('#')]
67 # Check input file type 71 assert(len(inputconfig) >= 6)
68 if isinstance(args.inputfile_types, list): 72 config_type = inputconfig[0]
69 if len(args.inputfile_types) == 1 and args.inputfile_types[0] == 'collection': 73 input_type = inputconfig[1]
70 if len(args.inputfiles) != 1 or args.inputfiles[0] != 'inputfiles.txt': 74 num_stack = int(inputconfig[2])
71 raise ValueError('Inconsistent inputfiles and inputfile_types:\n'+ 75 stack_types = [x.strip() for x in inputconfig[3].split()]
72 f'inputfiles ({type(inputfiles)}):\n{inputfiles}\n'+ 76 num_imgs = [int(x.strip()) for x in inputconfig[4].split()]
73 f'inputfile_types ({type(inputfile_types)}):\n{inputfile_types}') 77 img_offsets = [int(x.strip()) for x in inputconfig[5].split()]
74 input_as_collection = True 78 if config_type == 'config_manual':
79 assert(len(inputconfig) == 7)
80 ref_heights = [float(x.strip()) for x in inputconfig[6].split()]
81 else:
82 ref_heights = None
83 logging.info(f'config_type = {config_type} {type(config_type)}')
84 logging.info(f'input_type = {input_type} {type(input_type)}')
85 logging.info(f'num_stack = {num_stack} {type(num_stack)}')
86 logging.info(f'stack_types = {stack_types} {type(stack_types)}')
87 logging.info(f'num_imgs = {num_imgs} {type(num_imgs)}')
88 logging.info(f'img_offsets = {img_offsets} {type(img_offsets)}')
89 logging.info(f'ref_heights = {ref_heights} {type(ref_heights)}')
90
91 # Read input files and collect data files info
92 datasets = []
93 with open(args.inputfiles) as f:
94 for line in f:
95 if not line.strip() or line.startswith('#'):
96 continue
97 fields = [x.strip() for x in line.split('\t')]
98 filepath = fields[0]
99 element_identifier = fields[1] if len(fields) > 1 else fields[0].split('/')[-1]
100 datasets.append({'element_identifier' : fields[1], 'filepath' : filepath})
101 logging.debug(f'datasets:\n{datasets}')
102 print(f'datasets:\n{datasets}')
103
104 # Read and sort data files
105 collections = []
106 for dataset in datasets:
107 element_identifier = [x.strip() for x in dataset['element_identifier'].split('_')]
108 if len(element_identifier) > 1:
109 name = element_identifier[0]
75 else: 110 else:
76 if len(args.inputfiles) != len(args.inputfile_types): 111 name = 'other'
77 raise ValueError('Inconsistent inputfiles and inputfile_types:\n'+ 112 filepath = dataset['filepath']
78 f'inputfiles ({type(inputfiles)}):\n{inputfiles}\n'+ 113 print(f'element_identifier = {element_identifier} {len(element_identifier)}')
79 f'inputfile_types ({type(inputfile_types)}):\n{inputfile_types}') 114 print(f'name = {name}')
80 input_as_collection = False 115 print(f'filepath = {filepath}')
81 else: 116 if not len(collections):
82 raise ValueError(f'Invalid args.inputfile_types: {args.inputfile_types} '+ 117 collections = [{'name' : name, 'filepaths' : [filepath]}]
83 f'{type(args.inputfile_types)}') 118 else:
84 119 collection = [c for c in collections if c['name'] == name]
85 datasets = [] 120 if len(collection):
86 collections = [] 121 collection[0]['filepaths'].append(filepath)
87 if input_as_collection:
88 # Read input file collections and collect data files info
89 with open(args.inputfiles[0]) as cf:
90 for line in cf:
91 if not line.strip() or line.startswith('#'):
92 continue
93 fields = [x.strip() for x in line.split('\t')]
94 filepath = fields[0]
95 element_identifier = fields[1] if len(fields) > 1 else fields[0].split('/')[-1]
96 datasets.append({'element_identifier' : fields[1], 'filepath' : filepath})
97 logging.debug(f'datasets:\n{datasets}')
98
99 # Read and sort data files
100 for dataset in datasets:
101 element_identifier = [x.strip() for x in dataset['element_identifier'].split('_')]
102 if len(element_identifier) > 1:
103 name = element_identifier[0]
104 else: 122 else:
105 name = 'other' 123 collection = {'name' : name, 'filepaths' : [filepath]}
106 filepath = dataset['filepath'] 124 collections.append(collection)
107 if not len(collections):
108 collections = [{'name' : name, 'filepaths' : [filepath]}]
109 else:
110 collection = [c for c in collections if c['name'] == name]
111 if len(collection):
112 collection[0]['filepaths'].append(filepath)
113 else:
114 collection = {'name' : name, 'filepaths' : [filepath]}
115 collections.append(collection)
116 else:
117 # Collect input file datasets info
118 collections = [{'name' : filetype, 'filepaths' : [filepath]}
119 for filetype, filepath in zip(args.inputfile_types, args.inputfiles)]
120 logging.debug(f'collections:\n{collections}') 125 logging.debug(f'collections:\n{collections}')
121 if len(args.tomo_ranges) != 2*len(collections): 126 print(f'collections:\n{collections}')
122 raise ValueError('Inconsistent tomo ranges size.') 127 return
123 128
124 # Instantiate Tomo object 129 # Instantiate Tomo object
125 tomo = Tomo(config_file=args.config, config_out=args.output_config, log_level=log_level, 130 tomo = Tomo(config_file=args.config, config_out=args.output_config, log_level=log_level,
126 log_stream=args.log, galaxy_flag=True) 131 log_stream=args.log, galaxy_flag=True)
127 if not tomo.is_valid: 132 if not tomo.is_valid:
169 raise ValueError('Inconsistent number of tomography data image sets') 174 raise ValueError('Inconsistent number of tomography data image sets')
170 tomo_stack_files = [] 175 tomo_stack_files = []
171 for stack in stack_info['stacks']: 176 for stack in stack_info['stacks']:
172 stack['img_offset'] = args.tomo_ranges[2*num_collections] 177 stack['img_offset'] = args.tomo_ranges[2*num_collections]
173 stack['num'] = args.tomo_ranges[2*num_collections+1] 178 stack['num'] = args.tomo_ranges[2*num_collections+1]
174 if input_as_collection: 179 tomo_files = [c['filepaths'] for c in collections if c['name'] == f'set{stack["index"]}']
175 tomo_files = [c['filepaths'] for c in collections
176 if c['name'] == f'set{stack["index"]}']
177 else:
178 assert(collections[num_collections]['name'] == 'data')
179 tomo_files = [collections[num_collections]['filepaths']]
180 if len(tomo_files) != 1 or len(tomo_files[0]) < 1: 180 if len(tomo_files) != 1 or len(tomo_files[0]) < 1:
181 exit(f'Unable to obtain tomography images for set {stack["index"]}') 181 exit(f'Unable to obtain tomography images for set {stack["index"]}')
182 tomo_stack_files.append(tomo_files[0]) 182 tomo_stack_files.append(tomo_files[0])
183 num_collections += 1 183 num_collections += 1
184 184