diff tomo_setup.py @ 49:26f99fdd8d61 draft

"planemo upload for repository https://github.com/rolfverberg/galaxytools commit 4f7738d02f4a3fd91373f43937ed311b6fe11a12"
author rv43
date Thu, 28 Jul 2022 16:05:24 +0000
parents ef5c2f7b49ec
children 79c216516ef9
line wrap: on
line diff
--- a/tomo_setup.py	Wed Apr 27 17:28:26 2022 +0000
+++ b/tomo_setup.py	Thu Jul 28 16:05:24 2022 +0000
@@ -11,7 +11,6 @@
 import tracemalloc
 
 from tomo import Tomo
-import msnc_tools as msnc
 
 #from memory_profiler import profile
 #@profile
@@ -21,20 +20,25 @@
     parser = argparse.ArgumentParser(
             description='Setup tomography reconstruction')
     parser.add_argument('-i', '--inputfiles',
+            nargs='+',
             default='inputfiles.txt',
-            help='Input file collections')
+            help='Input file datasets or collections')
     parser.add_argument('-c', '--config',
             help='Input config')
+    parser.add_argument('-l', '--log', 
+            type=argparse.FileType('w'),
+            default=sys.stdout,
+            help='Log file')
+    parser.add_argument('-t', '--inputfile_types',
+            nargs='+',
+            default='collection',
+            help='Input files type (collection or a list of set types: dark, bright, or data)')
     parser.add_argument('--theta_range',
             help='Theta range (lower bound, upper bound, number of angles)')
     parser.add_argument('--output_config',
             help='Output config')
     parser.add_argument('--output_data',
             help='Preprocessed tomography data')
-    parser.add_argument('-l', '--log', 
-            type=argparse.FileType('w'),
-            default=sys.stdout,
-            help='Log file')
     parser.add_argument('tomo_ranges', metavar='N', type=int, nargs='+')
     args = parser.parse_args()
 
@@ -51,43 +55,59 @@
             handlers=[logging.StreamHandler()])
 
     logging.debug(f'config = {args.config}')
+    logging.debug(f'inputfiles = {args.inputfiles}')
+    logging.debug(f'inputfile_types = {args.inputfile_types}')
+    logging.debug(f'log = {args.log}')
+    logging.debug(f'is log stdout? {args.log is sys.stdout}')
     logging.debug(f'theta_range = {args.theta_range.split()}')
     logging.debug(f'output_config = {args.output_config}')
     logging.debug(f'output_data = {args.output_data}')
-    logging.debug(f'log = {args.log}')
-    logging.debug(f'is log stdout? {args.log is sys.stdout}')
     logging.debug(f'tomoranges = {args.tomo_ranges}')
 
-    # Read input files and collect data files info
+    # Check input file type
+    if isinstance(args.inputfile_types, str) and args.inputfile_types == 'collection':
+        input_as_collection = True
+    elif isinstance(args.inputfile_types, list):
+        input_as_collection = False
+    else:
+        raise ValueError(f'Invalid args.inputfile_types: {args.inputfile_types} '+
+                f'{type(args.inputfile_types)}')
+
     datasets = []
-    with open(args.inputfiles) as cf:
-        for line in cf:
-            if not line.strip() or line.startswith('#'):
-                continue
-            fields = [x.strip() for x in line.split('\t')]
-            filepath = fields[0]
-            element_identifier = fields[1] if len(fields) > 1 else fields[0].split('/')[-1]
-            datasets.append({'element_identifier' : fields[1], 'filepath' : filepath})
-    logging.debug(f'datasets:\n{datasets}')
-
-    # Read and sort data files
     collections = []
-    for dataset in datasets:
-        element_identifier = [x.strip() for x in dataset['element_identifier'].split('_')]
-        if len(element_identifier) > 1:
-            name = element_identifier[0]
-        else:
-            name = 'other'
-        filepath = dataset['filepath']
-        if not len(collections):
-            collections = [{'name' : name, 'filepaths' : [filepath]}]
-        else:
-            collection = [c for c in collections if c['name'] == name]
-            if len(collection):
-                collection[0]['filepaths'].append(filepath)
+    if input_as_collection:
+        # Read input file collections and collect data files info
+        with open(args.inputfiles) as cf:
+            for line in cf:
+                if not line.strip() or line.startswith('#'):
+                    continue
+                fields = [x.strip() for x in line.split('\t')]
+                filepath = fields[0]
+                element_identifier = fields[1] if len(fields) > 1 else fields[0].split('/')[-1]
+                datasets.append({'element_identifier' : fields[1], 'filepath' : filepath})
+        logging.debug(f'datasets:\n{datasets}')
+
+        # Read and sort data files
+        for dataset in datasets:
+            element_identifier = [x.strip() for x in dataset['element_identifier'].split('_')]
+            if len(element_identifier) > 1:
+                name = element_identifier[0]
             else:
-                collection = {'name' : name, 'filepaths' : [filepath]}
-                collections.append(collection)
+                name = 'other'
+            filepath = dataset['filepath']
+            if not len(collections):
+                collections = [{'name' : name, 'filepaths' : [filepath]}]
+            else:
+                collection = [c for c in collections if c['name'] == name]
+                if len(collection):
+                    collection[0]['filepaths'].append(filepath)
+                else:
+                    collection = {'name' : name, 'filepaths' : [filepath]}
+                    collections.append(collection)
+    else:
+        # Collect input file datasets info
+        collections = [{'name' : filetype, 'filepaths' : [filepath]} 
+                for filetype, filepath in zip(args.inputfile_types, args.inputfiles)]
     logging.debug(f'collections:\n{collections}')
     if len(args.tomo_ranges) != 2*len(collections):
         raise ValueError('Inconsistent tomo ranges size.')
@@ -142,7 +162,12 @@
     for stack in stack_info['stacks']:
         stack['img_offset'] = args.tomo_ranges[2*num_collections]
         stack['num'] = args.tomo_ranges[2*num_collections+1]
-        tomo_files = [c['filepaths'] for c in collections if c['name'] == f'set{stack["index"]}']
+        if input_as_collection:
+            tomo_files = [c['filepaths'] for c in collections
+                if c['name'] == f'set{stack["index"]}']
+        else:
+            assert(collections[num_collections]['name'] == 'data')
+            tomo_files = [collections[num_collections]['filepaths']]
         if len(tomo_files) != 1 or len(tomo_files[0]) < 1:
             exit(f'Unable to obtain tomography images for set {stack["index"]}')
         tomo_stack_files.append(tomo_files[0])