Mercurial > repos > bgruening > yolo_predict

--- a/macros.xml	Mon Jul 14 18:28:13 2025 +0000
+++ b/macros.xml	Mon Jul 21 15:50:37 2025 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">8.3.0</token>
-    <token name="@VERSION_SUFFIX@">3</token>
+    <token name="@VERSION_SUFFIX@">4</token>
     <xml name="creator">
         <creator>
             <person name="Yi Sun" email="sunyi000@gmail.com" />
Binary file test-data/results_plot.png has changed
Binary file test-data/results_plot1.png has changed
--- a/yolo_prediction.xml	Mon Jul 14 18:28:13 2025 +0000
+++ b/yolo_prediction.xml	Mon Jul 21 15:50:37 2025 +0000
@@ -13,7 +13,7 @@
       mkdir -p ./input_images ./runs ./models ./results &&

       #for $filename in $input_images:
-          ln -s '$filename' './input_images/${filename.element_identifier}' &&
+          ln -s '$filename' './input_images/${filename.element_identifier}.${filename.ext}' &&
       #end for

       cp '$class_name' './models/class_name.txt' &&
@@ -52,46 +52,18 @@
     </inputs>
     <outputs>
         <collection name="txt_results" format="txt" type="list" label="YOLO bounding box and annotation (text)">
-            <discover_datasets pattern="(?P&lt;name&gt;.*)\.txt$" directory="results" visible="true"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt$" directory="results" />
         </collection>
         <collection name="mask_results" format="tiff" type="list" label="YOLO segmentation masks (TIFF)">
-            <discover_datasets pattern="(?P&lt;name&gt;.*)_mask\.tiff$" directory="results" visible="true"/>
+            <filter>mode == 'segment'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_mask\.tiff$" directory="results" />
         </collection>
         <collection name="overlay_results" format="jpg" type="list" label="YOLO annotated images">
-            <discover_datasets pattern="(?P&lt;name&gt;.*)\.jpg$" directory="runs/overlaid_images" visible="true"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.jpg$" directory="runs/overlaid_images" />
         </collection>
     </outputs>
     <tests>
-    <!--tests from first version-->
-        <test>
-        <param name="input_images" value="pred-test01.jpg" />
-        <param name="model" location="https://zenodo.org/records/15611468/files/best.pt" />
-        <param name="image_size" value="512" />
-        <param name="confidence" value="0.003" />
-        <param name="iou" value="0.7" />
-        <param name="max_det" value="100" />
-        <param name="mode" value="segment" />
-        <param name="class_name" value="class_name.txt" />
-
-        <output_collection name="txt_results" type="list" count="1">
-            <element name="pred-test01">
-                <assert_contents>
-                    <has_n_lines n="100"/>
-                </assert_contents>
-            </element>
-        </output_collection>
-
-        <output_collection name="mask_results" type="list" count="1">
-            <element name="pred-test01">
-                <assert_contents>
-                    <has_image_width width="512"/>
-                    <has_image_height height="1024"/>
-                    <has_image_channels channels="1"/>
-                </assert_contents>
-            </element>
-        </output_collection>
-    </test>
-    <test>
+    <test expect_num_outputs="3">
             <param name="input_images" value="pred-test01.jpg" />
             <param name="model" location="https://zenodo.org/records/15611468/files/best.pt" />
             <param name="image_size" value="512" />
@@ -102,7 +74,7 @@
             <param name="class_name" value="class_name.txt" />

             <output_collection name="txt_results" type="list" count="1">
-                <element name="pred-test01">
+                <element name="pred-test01.jpg">
                     <assert_contents>
                         <has_n_lines n="100"/>
                     </assert_contents>
@@ -110,18 +82,29 @@
             </output_collection>

             <output_collection name="mask_results" type="list" count="1">
-                <element name="pred-test01">
+                <element name="pred-test01.jpg">
                     <assert_contents>
                         <has_image_width width="512"/>
                         <has_image_height height="1024"/>
                         <has_image_channels channels="1"/>
                     </assert_contents>
                 </element>
+	    </output_collection>
+            <output_collection name="overlay_results" type="list" count="1">
+	        <element name="pred-test01.jpg">
+		    <assert_contents>
+                        <has_image_width width="512"/>
+                        <has_image_height height="1024"/>
+                        <has_image_channels channels="3"/>
+                    </assert_contents>
+		</element>
             </output_collection>
+
         </test>
         <!--new version's test-->
-        <!-- SEGMENT MODE TEST -->
-        <test>
+	<!-- SEGMENT MODE TEST -->
+
+        <test expect_num_outputs="3">
             <param name="input_images" value="bus.jpg" />
             <param name="class_name" value="yolo-test-classes.txt" />
             <param name="model" location="https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt" />
@@ -131,18 +114,19 @@
             <param name="iou" value="0.45" />
             <param name="max_det" value="300" />
             <output_collection name="txt_results" type="list" count="1">
-                <element name="bus" file="bus.txt" compare="sim_size" />
+                <element name="bus.jpg" file="bus.txt" compare="sim_size" />
             </output_collection>
             <output_collection name="mask_results" type="list" count="1">
-                <element name="bus" file="bus_mask.tiff" compare="sim_size" />
+                <element name="bus.jpg" file="bus_mask.tiff" compare="sim_size" />
             </output_collection>
             <output_collection name="overlay_results" type="list" count="1">
-                <element name="bus" file="bus_seg.jpg" compare="sim_size" />
+                <element name="bus.jpg" file="bus_seg.jpg" compare="sim_size" />
             </output_collection>
         </test>

-        <!-- DETECT MODE TEST -->
-        <test>
+	<!-- DETECT MODE TEST -->
+
+        <test expect_num_outputs="2">
             <param name="input_images" value="bus.jpg" />
             <param name="class_name" value="yolo-test-classes.txt" />
             <param name="model" location="https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt" />
@@ -152,12 +136,13 @@
             <param name="iou" value="0.45" />
             <param name="max_det" value="300" />
             <output_collection name="txt_results" type="list" count="1">
-                <element name="bus" file="bus_detect.txt" compare="sim_size" />
+                <element name="bus.jpg" file="bus_detect.txt" compare="sim_size" />
             </output_collection>
             <output_collection name="overlay_results" type="list" count="1">
-                <element name="bus" file="bus_seg.jpg" compare="sim_size" />
+                <element name="bus.jpg" file="bus_seg.jpg" compare="sim_size" />
             </output_collection>
-        </test>
+	</test>
+
     </tests>

     <help><![CDATA[
@@ -171,9 +156,9 @@

     Choose from two available options using the **mode** parameter:

-    - **detect** – Use this if your pre-trained model is a detection model. It outputs bounding boxes, class IDs, and confidence scores for objects detected in each image.
+	    - **detect** – Use this if your pre-trained/trained model is a detection model. It outputs bounding boxes coordinates, class IDs, and confidence scores and overlay images with bounding boxes.

-    - **segment** – Use this if your pre-trained model is a segmentation model. It outputs both bounding boxes **and** pixel-level segmentation masks. Additional output files include grayscale TIFF masks.
+	    - **segment** – Use this if your pre-trained/trained model is a segmentation model. It outputs text file containing polygons coordinates, class IDs and confidence score and overlaid images with bounding boxes and mask images.

     ⚠️ **Important:** The selected mode must match the type of the model you upload. For example, if you are using a segmentation model (`*-seg.pt`), you must select `segment` mode. Using a mismatched mode and model will lead to errors or empty results.

@@ -196,13 +181,13 @@

     - **YOLO Annotations (.txt)**:
     - One file per input image.
-    - Contains predicted class ID, confidence score, and bounding box (and segmentation data if applicable).
+    - Contains bounding boxes/polygons coordinates,predicted class ID, confidence scores.

     - **Segmentation Masks (.tiff)** (for `segment` mode only):
-    - Grayscale images showing detected object areas.
+    - Binary images showing detected object areas.

     - **Images with annotations(.jpg)**:
-    - Input images with bounding boxes and only for segment mode the overlayer.
+    - Overlaid images with bounding boxes and/or segmentation overlay.

     ]]></help>
     <expand macro="citations" />
--- a/yolov8.py	Mon Jul 14 18:28:13 2025 +0000
+++ b/yolov8.py	Mon Jul 21 15:50:37 2025 +0000
@@ -1,4 +1,5 @@
 import argparse
+import csv
 import os
 import pathlib
 import time
@@ -11,7 +12,6 @@
 from tifffile import imwrite
 from ultralytics import YOLO

-
 #
 # Input arguments
 #
@@ -79,7 +79,6 @@
 parser.add_argument("--class_names_file",
                     help="Path to the text file containing class names.",
                     type=str)
-
 # For training the model and prediction
 parser.add_argument("--mode",
                     help=(
@@ -130,6 +129,7 @@
 parser.add_argument('--headless', action='store_true')
 parser.add_argument('--nextflow', action='store_true')

+
 # For data augmentation
 parser.add_argument("--hsv_h",
                     help="(float) image HSV-Hue augmentation (fraction)",
@@ -171,9 +171,6 @@
                     default=1.0, type=float)


-#
-# Functions
-#
 # Train a new model on the dataset mentioned in yaml file
 def trainModel(model_path, model_name, yaml_filepath, **kwargs):
     if "imgsz" in kwargs:
@@ -272,13 +269,12 @@
                 translate=aug_translate, shear=aug_shear, scale=aug_scale,
                 perspective=aug_perspective, fliplr=aug_fliplr,
                 flipud=aug_flipud, mosaic=aug_mosaic, crop_fraction=aug_crop_fraction,
-                weight_decay=weight_decay, lr0=init_lr, seed=42)
+                weight_decay=weight_decay, lr0=init_lr)
     return model


 # Validate the trained model
 def validateModel(model):
-    # Validate the model
     metrics = model.val()  # no args needed, dataset & settings remembered
     metrics.box.map    # map50-95
     metrics.box.map50  # map50
@@ -316,6 +312,7 @@
     run_save_dir = kwargs['run_dir']  # For Galaxy, run_save_dir is always provided via xml wrapper
     if "foldername" in kwargs:
         save_folder_name = kwargs['foldername']
+
     # infer on a local image or directory containing images/videos
     prediction = model.predict(source=source_datapath, save=True, stream=True,
                                conf=confidence, imgsz=image_size,
@@ -329,6 +326,7 @@
 def save_yolo_bounding_boxes_to_txt(predictions, save_dir):
     """
     Function to save YOLO bounding boxes to text files.
+
     Parameters:
     - predictions: List of results from YOLO model inference.
     - save_dir: Directory where the text files will be saved.
@@ -339,12 +337,15 @@
         bounding_boxes = result.boxes.xyxy  # Bounding boxes in xyxy format
         confidence_scores = result.boxes.conf  # Confidence scores
         class_nums = result.boxes.cls  # Class numbers
+
         # Create save directory if it doesn't exist
         save_path = pathlib.Path(save_dir).absolute()
         save_path.mkdir(parents=True, exist_ok=True)
+
         # Construct filename for the text file
         image_filename = pathlib.Path(result.path).stem
         text_filename = save_path / f"{image_filename}.txt"
+
         # Write bounding boxes info into the text file
         with open(text_filename, 'w') as f:
             for i in range(bounding_boxes.shape[0]):
@@ -352,12 +353,14 @@
                 confidence = confidence_scores[i]
                 class_num = int(class_nums[i])
                 f.write(f'{class_num:01} {x1:06.2f} {y1:06.2f} {x2:06.2f} {y2:06.2f} {confidence:0.02} \n')
-        print(colored(f"Bounding boxes saved in: {text_filename}", 'green'))
+            print(colored(f"Bounding boxes saved in: {text_filename}", 'green'))


+# Main code
 if __name__ == '__main__':
     args = parser.parse_args()
     os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+
     # Train/load model
     if (args.train):
         model = trainModel(args.model_path, args.model_name, args.yaml_path,
@@ -377,7 +380,7 @@
                                       "train", "weights", "best.pt"))
         else:
             model = YOLO(os.path.join(args.model_path,
-                                      args.model_name + ".pt"))
+                         args.model_name + ".pt"))
         model.info(verbose=True)
         elapsed = time.time() - t
         print(colored(f"\nYOLO model loaded in : '{elapsed}' sec \n", 'white', 'on_yellow'))
@@ -422,76 +425,107 @@
                                   tracker=args.tracker_file,
                                   conf=args.confidence,
                                   iou=args.iou,
-                                  persist=False,
-                                  show=True,
+                                  persist=True,
+                                  show=False,
                                   save=True,
                                   project=args.run_dir,
                                   name=args.foldername)
             # Store the track history
             track_history = defaultdict(lambda: [])

-            for result in results:
-                # Get the boxes and track IDs
-                if result.boxes and result.boxes.is_track:
-                    boxes = result.boxes.xywh.cpu()
-                    track_ids = result.boxes.id.int().cpu().tolist()
-                    # Visualize the result on the frame
-                    frame = result.plot()
-                    # Plot the tracks
-                    for box, track_id in zip(boxes, track_ids):
-                        x, y, w, h = box
-                        track = track_history[track_id]
-                        track.append((float(x), float(y)))  # x, y center point
-                        if len(track) > 30:  # retain 30 tracks for 30 frames
-                            track.pop(0)
-
-                        # Draw the tracking lines
-                        points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
-                        cv2.polylines(frame, [points], isClosed=False, color=(230, 230, 230), thickness=2)
-
-                    # Display the annotated frame
-                    cv2.imshow("YOLO11 Tracking", frame)
-                    print(colored(f"Tracking results saved in : '{args.save_dir}' \n", 'green'))
+            tsv_path = os.path.join(args.save_dir, "tracks.tsv")
+            with open(tsv_path, "w", newline="") as tsvfile:
+                writer = csv.writer(tsvfile, delimiter='\t')
+                writer.writerow(['track_id', 'frame', 'class', 'centroid_x', 'centroid_y'])
+                frame_idx = 0
+                for result in results:
+                    # Get the boxes and track IDs
+                    if result.boxes and result.boxes.is_track:
+                        track_ids = result.boxes.id.int().cpu().tolist()
+                        labels = result.boxes.cls.int().cpu().tolist() if hasattr(result.boxes, "cls") else [0] * len(track_ids)
+                        # Prepare mask image
+                        img_shape = result.orig_shape if hasattr(result, "orig_shape") else result.orig_img.shape
+                        mask = np.zeros(img_shape[:2], dtype=np.uint16)
+                        # Check if polygons (masks) are available
+                        if hasattr(result, "masks") and result.masks is not None and hasattr(result.masks, "xy"):
+                            polygons = result.masks.xy
+                            for i, (track_id, label) in enumerate(zip(track_ids, labels)):
+                                if i < len(polygons):
+                                    contour = polygons[i].astype(np.int32)
+                                    contour = contour.reshape(-1, 1, 2)
+                                    cv2.drawContours(mask, [contour], -1, int(track_id), cv2.FILLED)
+                                    # Calculate centroid of the polygon
+                                    M = cv2.moments(contour)
+                                    if M["m00"] != 0:
+                                        cx = float(M["m10"] / M["m00"])
+                                        cy = float(M["m01"] / M["m00"])
+                                    else:
+                                        cx, cy = 0.0, 0.0
+                                    writer.writerow([track_id, frame_idx, label, cx, cy])
+                        else:
+                            # Fallback to bounding boxes if polygons are not available
+                            boxes = result.boxes.xywh.cpu()
+                            xyxy_boxes = result.boxes.xyxy.cpu().numpy()
+                            for i, (box, xyxy, track_id, label) in enumerate(zip(boxes, xyxy_boxes, track_ids, labels)):
+                                x, y, w, h = box
+                                writer.writerow([track_id, frame_idx, label, float(x), float(y)])
+                                x1, y1, x2, y2 = map(int, xyxy)
+                                cv2.rectangle(mask, (x1, y1), (x2, y2), int(track_id), thickness=-1)
+                        # Collect masks for TYX stack
+                        if frame_idx == 0:
+                            mask_stack = []
+                        mask_stack.append(mask)
+                    frame_idx += 1
+            # Save TYX stack (T=frames, Y, X)
+            if 'mask_stack' in locals() and len(mask_stack) > 0:
+                tyx_array = np.stack(mask_stack, axis=0)
+                # Remove string from last underscore in filename
+                stem = pathlib.Path(result.path).stem
+                stem = stem.rsplit('_', 1)[0] if '_' in stem else stem
+                mask_save_as = str(pathlib.Path(os.path.join(args.save_dir, stem + "_mask.tiff")).absolute())
+                imwrite(mask_save_as, tyx_array)
+                print(colored(f"TYX mask stack saved as : '{mask_save_as}'", 'magenta'))
+            print(colored(f"Tracking results saved in : '{args.save_dir}' \n", 'green'))
         elif (args.mode == "segment"):
             # Read class names from the file
             with open(args.class_names_file, 'r') as f:
                 class_names = [line.strip() for line in f.readlines()]
+            # Create a mapping from class names to indices
             class_to_index = {class_name: i for i, class_name in enumerate(class_names)}

             # Save polygon coordinates
             for result in predictions:
+                # Create binary mask
                 img = np.copy(result.orig_img)
                 filename = pathlib.Path(result.path).stem
                 b_mask = np.zeros(img.shape[:2], np.uint8)
                 mask_save_as = str(pathlib.Path(os.path.join(args.save_dir, filename + "_mask.tiff")).absolute())
+                # Define output file path for text file
+                output_filename = os.path.splitext(filename)[0] + ".txt"
                 txt_save_as = str(pathlib.Path(os.path.join(args.save_dir, filename + ".txt")).absolute())
-
+                instance_id = 1  # Start instance IDs from 1
                 for c, ci in enumerate(result):
-                    if ci.masks is not None and ci.masks.xy:
-                        #  Extract contour
-                        contour = ci.masks.xy.pop()
-                        contour = contour.astype(np.int32).reshape(-1, 1, 2)
-                        _ = cv2.drawContours(b_mask, [contour], -1, (255, 255, 255), cv2.FILLED)
-
-                        # Normalized polygon points
-                        points = ci.masks.xyn.pop()
-                        obj_class = int(ci.boxes.cls.to("cpu").numpy().item())
-                        confidence = result.boxes.conf.to("cpu").numpy()[c]
+                    # Extract contour result
+                    contour = ci.masks.xy.pop()
+                    contour = contour.astype(np.int32)
+                    contour = contour.reshape(-1, 1, 2)
+                    # Draw contour onto mask with unique instance id
+                    _ = cv2.drawContours(b_mask, [contour], -1, instance_id, cv2.FILLED)

-                        with open(txt_save_as, 'a') as f:
-                            segmentation_points = ['{} {}'.format(points[i][0], points[i][1]) for i in range(len(points))]
-                            segmentation_points_string = ' '.join(segmentation_points)
-                            line = '{} {} {}\n'.format(obj_class, segmentation_points_string, confidence)
-                            f.write(line)
-                    else:
-                        print(colored(f"⚠️ No mask found for object {c} in '{filename}'. Skipping.", "yellow"))
+                    # Normalized polygon points
+                    points = ci.masks.xyn.pop()
+                    confidence = result.boxes.conf.to("cpu").numpy()[c]

-                # Overlay mask onto original image
-                colored_mask = cv2.merge([b_mask, np.zeros_like(b_mask), np.zeros_like(b_mask)])
-                blended = cv2.addWeighted(img, 1.0, colored_mask, 0.5, 0)
-                overlay_path = os.path.join(args.save_dir, filename + "_overlay.jpg")
-                cv2.imwrite(overlay_path, blended)
+                    with open(txt_save_as, 'a') as f:
+                        segmentation_points = ['{} {}'.format(points[i][0], points[i][1]) for i in range(len(points))]
+                        segmentation_points_string = ' '.join(segmentation_points)
+                        line = '{} {} {}\n'.format(instance_id, segmentation_points_string, confidence)
+                        f.write(line)

-                imwrite(mask_save_as, b_mask, imagej=True)
-                print(colored(f"Saved binary mask as : \n '{mask_save_as}' \n", 'magenta'))
+                    instance_id += 1  # Increment for next object
+
+                imwrite(mask_save_as, b_mask, imagej=True)  # save label mask image
+                print(colored(f"Saved label mask as : \n '{mask_save_as}' \n", 'magenta'))
                 print(colored(f"Polygon coordinates saved as : \n '{txt_save_as}' \n", 'cyan'))
+        else:
+            raise Exception(("Currently only 'detect', 'segment' and 'track' modes are available"))