Mercurial > repos > bgruening > json2yolosegment
annotate preprocessing.py @ 4:f6990d85161c draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit c6c9d43a4ecdc88ebdeaf3451453a550f159c506
author | bgruening |
---|---|
date | Mon, 21 Jul 2025 15:51:13 +0000 |
parents | 97bc82ee2a61 |
children |
rev | line source |
---|---|
0
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
1 import argparse |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
2 import os |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
3 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
4 from sklearn.model_selection import train_test_split |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
5 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
6 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
7 def get_basename(f): |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
8 return os.path.splitext(os.path.basename(f))[0] |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
9 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
10 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
11 def pair_files(images_dir, labels_dir): |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
12 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
13 img_files = [f for f in os.listdir(images_dir)] |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
14 lbl_files = [f for f in os.listdir(labels_dir)] |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
15 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
16 image_dict = {get_basename(f): f for f in img_files} |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
17 label_dict = {get_basename(f): f for f in lbl_files} |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
18 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
19 keys = sorted(set(image_dict) & set(label_dict)) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
20 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
21 return [(image_dict[k], label_dict[k]) for k in keys] |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
22 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
23 |
3
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
24 def copy_file(src, dst): |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
25 with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
26 while True: |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
27 chunk = fsrc.read(8192) |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
28 if not chunk: |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
29 break |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
30 fdst.write(chunk) |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
31 |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
32 |
0
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
33 def copy_pairs(pairs, image_src, label_src, image_dst, label_dst): |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
34 os.makedirs(image_dst, exist_ok=True) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
35 os.makedirs(label_dst, exist_ok=True) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
36 for img, lbl in pairs: |
3
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
37 copy_file(os.path.join(image_src, img), os.path.join(image_dst, img)) |
97bc82ee2a61
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 743c8acf1ea4e4b1e718743d3772b7e592646611
bgruening
parents:
0
diff
changeset
|
38 copy_file(os.path.join(label_src, lbl), os.path.join(label_dst, lbl)) |
0
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
39 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
40 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
41 def write_yolo_yaml(output_dir): |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
42 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
43 yolo_yaml_path = os.path.join(output_dir, "yolo.yml") |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
44 with open(yolo_yaml_path, 'w') as f: |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
45 f.write(f"path: {output_dir}\n") |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
46 f.write("train: train\n") |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
47 f.write("val: valid\n") |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
48 f.write("test: test\n") |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
49 f.write("\n") |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
50 f.write("names: ['dataset']\n") |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
51 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
52 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
53 def main(): |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
54 parser = argparse.ArgumentParser() |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
55 parser.add_argument("-i", "--images", required=True) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
56 parser.add_argument("-y", "--labels", required=True) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
57 parser.add_argument("-o", "--output", required=True) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
58 parser.add_argument("-p", "--train_percent", type=int, default=70) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
59 args = parser.parse_args() |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
60 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
61 all_pairs = pair_files(args.images, args.labels) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
62 train_size = args.train_percent / 100.0 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
63 val_test_size = 1.0 - train_size |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
64 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
65 train_pairs, val_test_pairs = train_test_split(all_pairs, test_size=val_test_size, random_state=42) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
66 val_pairs, test_pairs = train_test_split(val_test_pairs, test_size=0.5, random_state=42) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
67 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
68 copy_pairs(train_pairs, args.images, args.labels, os.path.join(args.output, "train/images"), os.path.join(args.output, "train/labels")) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
69 copy_pairs(val_pairs, args.images, args.labels, os.path.join(args.output, "valid/images"), os.path.join(args.output, "valid/labels")) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
70 copy_pairs(test_pairs, args.images, args.labels, os.path.join(args.output, "test/images"), os.path.join(args.output, "test/labels")) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
71 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
72 write_yolo_yaml(args.output) |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
73 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
74 |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
75 if __name__ == "__main__": |
252fd085940d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools commit 67e0e1d123bcfffb10bab8cc04ae67259caec557
bgruening
parents:
diff
changeset
|
76 main() |