Anykaka 1 年之前
父节点
当前提交
6f8ab63a1c

+ 17 - 0
.gitignore

@@ -0,0 +1,17 @@
+# -----> pictures
+*.jpg
+*.jpeg
+*.png
+
+# -----> lables
+*.xml
+*.txt
+*.json
+*.pt
+*.onnx
+
+# ----->
+*.zip
+.idea
+runs/*
+*/runs/

+ 33 - 0
main.py

@@ -0,0 +1,33 @@
+# 这是一个示例 Python 脚本。
+
+# 按 Shift+F10 执行或将其替换为您的代码。
+# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。
+import torch
+import torchvision
+
+print(torch.__version__)
+print(torch.version.cuda)
+
+print(torchvision.__version__)
+print(torchvision.version.cuda)
+
+# def print_hi(name):
+#     # 在下面的代码行中使用断点来调试脚本。
+#     print(f'Hi, {name}')  # 按 Ctrl+F8 切换断点。
+#
+#
+# # 按间距中的绿色按钮以运行脚本。
+# if __name__ == '__main__':
+#     print_hi('PyCharm')
+
+# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助
+
+print(torch.cuda.is_available())
+
+
+# yolo task=detect mode=predict model=yolov8n.pt source=run-people.jpg imgsz=640 show=True save=True
+
+# 训练
+# yolo task=segment mode=train model=tof3D/yolov8-seg.yaml data=tof3D/data.yaml epochs=100 batch=4
+
+

+ 8 - 0
tof3D/data.yaml

@@ -0,0 +1,8 @@
+train: "D:/DeepLearning/pytorch-gpu117/tof3D/dataSet/train.txt"
+val: "D:/DeepLearning/pytorch-gpu117/tof3D/dataSet/val.txt"
+
+names:
+  0: car
+  1: wheel
+
+

+ 478 - 0
tof3D/label_converter.py

@@ -0,0 +1,478 @@
+import argparse
+import json
+import os
+import time
+
+from PIL import Image
+from tqdm import tqdm
+from datetime import date
+
+import numpy as np
+import xml.dom.minidom as minidom
+import xml.etree.ElementTree as ET
+
+import sys
+
+sys.path.append('.')
+from anylabeling.app_info import __version__
+
+# ======================================================================= Usage ========================================================================#
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- custom2voc  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2voc                                      #                             
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- voc2custom  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode voc2custom                                      #
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- custom2yolo  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo                                     #                             
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- yolo2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode yolo2custom               #
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- custom2coco  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2coco                                     #                             
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- coco2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx.json --dst_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode coco2custom                 #                             
+#                                                                                                                                                      #
+# ======================================================================= Usage ========================================================================#
+
+
+VERSION = __version__
+
+
+class BaseLabelConverter:
+    def __init__(self, classes_file):
+
+        if classes_file:
+            with open(classes_file, 'r') as f:
+                self.classes = f.read().splitlines()
+        else:
+            self.classes = []
+
+    def reset(self):
+        self.custom_data = dict(
+            version=VERSION,
+            flags={},
+            shapes=[],
+            imagePath="",
+            imageData=None,
+            imageHeight=-1,
+            imageWidth=-1
+        )
+
+    def get_image_size(self, image_file):
+        with Image.open(image_file) as img:
+            width, height = img.size
+            return width, height
+
+
+class RectLabelConverter(BaseLabelConverter):
+
+    def custom_to_voc2017(self, input_file, output_dir):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_path = data['imagePath']
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        root = ET.Element('annotation')
+        ET.SubElement(root, 'folder').text = os.path.dirname(output_dir)
+        ET.SubElement(root, 'filename').text = os.path.basename(image_path)
+        size = ET.SubElement(root, 'size')
+        ET.SubElement(size, 'width').text = str(image_width)
+        ET.SubElement(size, 'height').text = str(image_height)
+        ET.SubElement(size, 'depth').text = '3'
+
+        for shape in data['shapes']:
+            label = shape['label']
+            points = shape['points']
+
+            xmin = str(points[0][0])
+            ymin = str(points[0][1])
+            xmax = str(points[1][0])
+            ymax = str(points[1][1])
+
+            object_elem = ET.SubElement(root, 'object')
+            ET.SubElement(object_elem, 'name').text = label
+            ET.SubElement(object_elem, 'pose').text = 'Unspecified'
+            ET.SubElement(object_elem, 'truncated').text = '0'
+            ET.SubElement(object_elem, 'difficult').text = '0'
+            bndbox = ET.SubElement(object_elem, 'bndbox')
+            ET.SubElement(bndbox, 'xmin').text = xmin
+            ET.SubElement(bndbox, 'ymin').text = ymin
+            ET.SubElement(bndbox, 'xmax').text = xmax
+            ET.SubElement(bndbox, 'ymax').text = ymax
+
+        xml_string = ET.tostring(root, encoding='utf-8')
+        dom = minidom.parseString(xml_string)
+        formatted_xml = dom.toprettyxml(indent='  ')
+
+        with open(output_dir, 'w') as f:
+            f.write(formatted_xml)
+
+    def voc2017_to_custom(self, input_file, output_file):
+        self.reset()
+
+        tree = ET.parse(input_file)
+        root = tree.getroot()
+
+        image_path = root.find('filename').text
+        image_width = int(root.find('size/width').text)
+        image_height = int(root.find('size/height').text)
+
+        self.custom_data['imagePath'] = image_path
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        for obj in root.findall('object'):
+            label = obj.find('name').text
+            xmin = float(obj.find('bndbox/xmin').text)
+            ymin = float(obj.find('bndbox/ymin').text)
+            xmax = float(obj.find('bndbox/xmax').text)
+            ymax = float(obj.find('bndbox/ymax').text)
+
+            shape = {
+                "label": label,
+                "text": "",
+                "points": [[xmin, ymin], [xmax, ymax]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = shape['points']
+
+                class_index = self.classes.index(label)
+
+                x_center = (points[0][0] + points[1][0]) / (2 * image_width)
+                y_center = (points[0][1] + points[1][1]) / (2 * image_height)
+                width = abs(points[1][0] - points[0][0]) / image_width
+                height = abs(points[1][1] - points[0][1]) / image_height
+
+                f.write(f"{class_index} {x_center} {y_center} {width} {height}\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            x_center = float(line[1])
+            y_center = float(line[2])
+            width = float(line[3])
+            height = float(line[4])
+
+            x_min = int((x_center - width / 2) * image_width)
+            y_min = int((y_center - height / 2) * image_height)
+            x_max = int((x_center + width / 2) * image_width)
+            y_max = int((y_center + height / 2) * image_height)
+
+            label = self.classes[class_index]
+
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_coco(self, input_path, output_path):
+        coco_data = {
+            "info": {
+                "year": 2023,
+                "version": VERSION,
+                "description": "COCO Label Conversion",
+                "contributor": "CVHub",
+                "url": "https://github.com/CVHub520/X-AnyLabeling",
+                "date_created": str(date.today())
+            },
+            "licenses": [
+                {
+                    "id": 1,
+                    "url": "https://www.gnu.org/licenses/gpl-3.0.html",
+                    "name": "GNU GENERAL PUBLIC LICENSE Version 3"
+                }
+            ],
+            "categories": [],
+            "images": [],
+            "annotations": []
+        }
+
+        for i, class_name in enumerate(self.classes):
+            coco_data['categories'].append({
+                "id": i + 1,
+                "name": class_name,
+                "supercategory": ""
+            })
+
+        image_id = 0
+        annotation_id = 0
+
+        file_list = os.listdir(input_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+
+            image_id += 1
+
+            input_file = os.path.join(input_path, file_name)
+            with open(input_file, 'r') as f:
+                data = json.load(f)
+
+            image_path = data['imagePath']
+            image_name = os.path.splitext(os.path.basename(image_path))[0]
+
+            coco_data['images'].append({
+                "id": image_id,
+                "file_name": image_name,
+                "width": data['imageWidth'],
+                "height": data['imageHeight'],
+                "license": 0,
+                "flickr_url": "",
+                "coco_url": "",
+                "date_captured": ""
+            })
+
+            for shape in data['shapes']:
+                annotation_id += 1
+                label = shape['label']
+                points = shape['points']
+                class_id = self.classes.index(label)
+                x_min = min(points[0][0], points[1][0])
+                y_min = min(points[0][1], points[1][1])
+                x_max = max(points[0][0], points[1][0])
+                y_max = max(points[0][1], points[1][1])
+                width = x_max - x_min
+                height = y_max - y_min
+
+                annotation = {
+                    "id": annotation_id,
+                    "image_id": image_id,
+                    "category_id": class_id + 1,
+                    "bbox": [x_min, y_min, width, height],
+                    "area": width * height,
+                    "iscrowd": 0
+                }
+
+                coco_data['annotations'].append(annotation)
+
+        output_file = os.path.join(output_path, "x_anylabeling_coco.json")
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(coco_data, f, indent=4, ensure_ascii=False)
+
+    def coco_to_custom(self, input_file, output_path, image_path):
+
+        img_dic = {}
+        for file in os.listdir(image_path):
+            img_dic[file] = file
+
+        with open(input_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+
+        if not self.classes:
+            for cat in data["categories"]:
+                self.classes.append(cat["name"])
+
+        total_info, label_info = {}, {}
+
+        # map category_id to name
+        for dic_info in data["categories"]:
+            label_info[dic_info["id"]] = dic_info["name"]
+
+        # map image_id to info
+        for dic_info in data["images"]:
+            total_info[dic_info["id"]] = {
+                "imageWidth": dic_info["width"],
+                "imageHeight": dic_info["height"],
+                "imagePath": img_dic[dic_info["file_name"]],
+                "shapes": []
+            }
+
+        for dic_info in data["annotations"]:
+            bbox = dic_info["bbox"]
+            x_min = bbox[0]
+            y_min = bbox[1]
+            width = bbox[2]
+            height = bbox[3]
+            x_max = x_min + width
+            y_max = y_min + height
+
+            shape_info = {
+                "label": self.classes[dic_info["category_id"] - 1],
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            total_info[dic_info["image_id"]]["shapes"].append(shape_info)
+
+        for dic_info in tqdm(total_info.values(), desc='Converting files', unit='file', colour='green'):
+            self.reset()
+            self.custom_data["shapes"] = dic_info["shapes"]
+            self.custom_data["imagePath"] = dic_info["imagePath"]
+            self.custom_data["imageHeight"] = dic_info["imageHeight"]
+            self.custom_data["imageWidth"] = dic_info["imageWidth"]
+
+            output_file = os.path.join(output_path, os.path.splitext(dic_info["imagePath"])[0] + ".json")
+            with open(output_file, 'w', encoding='utf-8') as f:
+                json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+
+class PolyLabelConvert(BaseLabelConverter):
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+        image_size = np.array([[image_width, image_height]])
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = np.array(shape['points'])
+                class_index = self.classes.index(label)
+                norm_points = points / image_size
+                f.write(f"{class_index} " + " ".join(
+                    [" ".join([str(cell[0]), str(cell[1])]) for cell in norm_points.tolist()]) + "\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+        image_size = np.array([image_width, image_height], np.float64)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            label = self.classes[class_index]
+            masks = line[1:]
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+            for x, y in zip(masks[0::2], masks[1::2]):
+                point = [np.float64(x), np.float64(y)]
+                point = np.array(point, np.float64) * image_size
+                shape['points'].append(point.tolist())
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Label Converter')
+    # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo
+    parser.add_argument('--task', default='polygon', choices=['rectangle', 'polygon'],
+                        help='Choose the type of task to perform')
+    parser.add_argument('--src_path', default='custom', help='Path to input directory')
+    parser.add_argument('--dst_path', default='yolo', help='Path to output directory')
+    parser.add_argument('--img_path', help='Path to image directory')
+    parser.add_argument('--classes', default='classes.txt',
+                        help='Path to classes.txt file, where each line represent a specific class')
+    parser.add_argument('--mode', default='custom2yolo', help='Choose the conversion mode what you need',
+                        choices=['custom2voc', 'voc2custom', 'custom2yolo', 'yolo2custom', 'custom2coco',
+                                 'coco2custom'])
+    args = parser.parse_args()
+    print(f"Starting conversion to {args.mode} format of {args.task}...")
+    start_time = time.time()
+
+    if args.task == 'rectangle':
+        converter = RectLabelConverter(args.classes)
+    elif args.task == 'polygon':
+        converter = PolyLabelConvert(args.classes)
+        valid_modes = ['custom2yolo', 'yolo2custom']
+        assert args.mode in valid_modes, f"Polygon tasks are only supported in {valid_modes} now!"
+
+    if args.mode == "custom2voc":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0] + '.xml')
+            converter.custom_to_voc2017(src_file, dst_file)
+    elif args.mode == "voc2custom":
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0] + '.json')
+            converter.voc2017_to_custom(src_file, dst_file)
+    elif args.mode == "custom2yolo":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0] + '.txt')
+            converter.custom_to_yolov5(src_file, dst_file)
+    elif args.mode == "yolo2custom":
+        img_dic = {}
+        for file in os.listdir(args.img_path):
+            prefix = file.split('.')[0]
+            img_dic[prefix] = file
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0] + '.json')
+            img_file = os.path.join(args.img_path, img_dic[os.path.splitext(file_name)[0]])
+            converter.yolov5_to_custom(src_file, dst_file, img_file)
+    elif args.mode == "custom2coco":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.custom_to_coco(args.src_path, args.dst_path)
+    elif args.mode == "coco2custom":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.coco_to_custom(args.src_path, args.dst_path, args.img_path)
+
+    end_time = time.time()
+    print(f"Conversion completed successfully: {args.dst_path}")
+    print(f"Conversion time: {end_time - start_time:.2f} seconds")
+
+
+if __name__ == '__main__':
+    main()

二进制
tof3D/labels.cache


+ 78 - 0
tof3D/split_train_val.py

@@ -0,0 +1,78 @@
+# coding:utf-8
+# brief:根据指定目录下的json配置文件,将其随机分为训练集和测试集,并将路径写到dataSet目录下的txt文件中
+
+import os
+import random
+import json
+
+
+# directory-主路径
+# fileType-指定文件类型
+# fileList-目标类型文件列表(路径+文件名)
+def SearchFiles(directory, fileType):
+    fileList = []
+    for root, subDirs, files in os.walk(directory):
+        for fileName in files:
+            if fileName.endswith(fileType):
+                # json_file = open(directory + '/' + fileName, 'r', encoding='UTF-8')
+                # json_data = json.load(json_file)
+                # jsonName = json_data['imagePath']
+                # jsonName = jsonName.replace('汽车 ', 'car')
+                # jsonName = jsonName.replace('(', '_')
+                # jsonName = jsonName.replace(')', '')
+                # json_data['imagePath'] = jsonName
+                # json_file = open(directory + '/' + fileName, 'w', encoding='UTF-8')
+                # json.dump(json_data, json_file, indent=2, ensure_ascii=False)
+                # print(jsonName)
+                fileList.append(fileName)
+    # for fileName in fileList:
+    #     if fileName.find('汽车 '):
+    #         newName = fileName.replace('汽车 ', 'car')
+    #         newName = newName.replace('(', '_')
+    #         newName = newName.replace(')', '')
+    #         os.rename(fileName, newName)
+    return fileList
+
+
+if __name__ == '__main__':
+    run_path = 'D:/DeepLearning/pytorch-gpu117/tof3D/'
+    last_file_path = '/home/zx/doc/private_hub/yolov8/ultralytics-main/examples/train_DS77/labels/'
+    # last_file_path = 'D:/DeepLearning/pytorch-gpu117/tof3D/labels/'
+    txt_save_path = run_path + 'dataSet'
+    if not os.path.exists(txt_save_path):
+        os.makedirs(txt_save_path)
+
+    trainval_percent = 1
+    train_percent = 0.9
+
+    total_json = SearchFiles(run_path + 'labels', '.txt')
+
+    print(total_json)
+
+    num = len(total_json)
+    list_index = range(num)
+    tv = int(num * trainval_percent)
+    tr = int(tv * train_percent)
+    trainval = random.sample(list_index, tv)
+    train = random.sample(trainval, tr)
+
+    file_trainval = open(txt_save_path + '/trainval.txt', 'w')
+    file_test = open(txt_save_path + '/test.txt', 'w')
+    file_train = open(txt_save_path + '/train.txt', 'w')
+    file_val = open(txt_save_path + '/val.txt', 'w')
+
+    for i in list_index:
+        name = last_file_path + total_json[i][:-3] + 'jpg\n'
+        if i in trainval:
+            file_trainval.write(name)
+            if i in train:
+                file_train.write(name)
+            else:
+                file_val.write(name)
+        else:
+            file_test.write(name)
+
+    file_trainval.close()
+    file_train.close()
+    file_val.close()
+    file_test.close()

+ 0 - 0
tof3D/train.py


+ 46 - 0
tof3D/yolov8-seg.yaml

@@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+
+# Parameters
+nc: 2  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Segment(P3, P4, P5)

+ 478 - 0
xm_lidar/label_converter.py

@@ -0,0 +1,478 @@
+import argparse
+import json
+import os
+import time
+
+from PIL import Image
+from tqdm import tqdm
+from datetime import date
+
+import numpy as np
+import xml.dom.minidom as minidom
+import xml.etree.ElementTree as ET
+
+import sys
+
+sys.path.append('.')
+from anylabeling.app_info import __version__
+
+# ======================================================================= Usage ========================================================================#
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- custom2voc  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2voc                                      #                             
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- voc2custom  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode voc2custom                                      #
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- custom2yolo  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo                                     #                             
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- yolo2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode yolo2custom               #
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- custom2coco  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2coco                                     #                             
+#                                                                                                                                                      #
+# -------------------------------------------------------------------- coco2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx.json --dst_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode coco2custom                 #                             
+#                                                                                                                                                      #
+# ======================================================================= Usage ========================================================================#
+
+
+VERSION = __version__
+
+
+class BaseLabelConverter:
+    def __init__(self, classes_file):
+
+        if classes_file:
+            with open(classes_file, 'r') as f:
+                self.classes = f.read().splitlines()
+        else:
+            self.classes = []
+
+    def reset(self):
+        self.custom_data = dict(
+            version=VERSION,
+            flags={},
+            shapes=[],
+            imagePath="",
+            imageData=None,
+            imageHeight=-1,
+            imageWidth=-1
+        )
+
+    def get_image_size(self, image_file):
+        with Image.open(image_file) as img:
+            width, height = img.size
+            return width, height
+
+
+class RectLabelConverter(BaseLabelConverter):
+
+    def custom_to_voc2017(self, input_file, output_dir):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_path = data['imagePath']
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        root = ET.Element('annotation')
+        ET.SubElement(root, 'folder').text = os.path.dirname(output_dir)
+        ET.SubElement(root, 'filename').text = os.path.basename(image_path)
+        size = ET.SubElement(root, 'size')
+        ET.SubElement(size, 'width').text = str(image_width)
+        ET.SubElement(size, 'height').text = str(image_height)
+        ET.SubElement(size, 'depth').text = '3'
+
+        for shape in data['shapes']:
+            label = shape['label']
+            points = shape['points']
+
+            xmin = str(points[0][0])
+            ymin = str(points[0][1])
+            xmax = str(points[1][0])
+            ymax = str(points[1][1])
+
+            object_elem = ET.SubElement(root, 'object')
+            ET.SubElement(object_elem, 'name').text = label
+            ET.SubElement(object_elem, 'pose').text = 'Unspecified'
+            ET.SubElement(object_elem, 'truncated').text = '0'
+            ET.SubElement(object_elem, 'difficult').text = '0'
+            bndbox = ET.SubElement(object_elem, 'bndbox')
+            ET.SubElement(bndbox, 'xmin').text = xmin
+            ET.SubElement(bndbox, 'ymin').text = ymin
+            ET.SubElement(bndbox, 'xmax').text = xmax
+            ET.SubElement(bndbox, 'ymax').text = ymax
+
+        xml_string = ET.tostring(root, encoding='utf-8')
+        dom = minidom.parseString(xml_string)
+        formatted_xml = dom.toprettyxml(indent='  ')
+
+        with open(output_dir, 'w') as f:
+            f.write(formatted_xml)
+
+    def voc2017_to_custom(self, input_file, output_file):
+        self.reset()
+
+        tree = ET.parse(input_file)
+        root = tree.getroot()
+
+        image_path = root.find('filename').text
+        image_width = int(root.find('size/width').text)
+        image_height = int(root.find('size/height').text)
+
+        self.custom_data['imagePath'] = image_path
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        for obj in root.findall('object'):
+            label = obj.find('name').text
+            xmin = float(obj.find('bndbox/xmin').text)
+            ymin = float(obj.find('bndbox/ymin').text)
+            xmax = float(obj.find('bndbox/xmax').text)
+            ymax = float(obj.find('bndbox/ymax').text)
+
+            shape = {
+                "label": label,
+                "text": "",
+                "points": [[xmin, ymin], [xmax, ymax]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = shape['points']
+
+                class_index = self.classes.index(label)
+
+                x_center = (points[0][0] + points[1][0]) / (2 * image_width)
+                y_center = (points[0][1] + points[1][1]) / (2 * image_height)
+                width = abs(points[1][0] - points[0][0]) / image_width
+                height = abs(points[1][1] - points[0][1]) / image_height
+
+                f.write(f"{class_index} {x_center} {y_center} {width} {height}\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            x_center = float(line[1])
+            y_center = float(line[2])
+            width = float(line[3])
+            height = float(line[4])
+
+            x_min = int((x_center - width / 2) * image_width)
+            y_min = int((y_center - height / 2) * image_height)
+            x_max = int((x_center + width / 2) * image_width)
+            y_max = int((y_center + height / 2) * image_height)
+
+            label = self.classes[class_index]
+
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_coco(self, input_path, output_path):
+        coco_data = {
+            "info": {
+                "year": 2023,
+                "version": VERSION,
+                "description": "COCO Label Conversion",
+                "contributor": "CVHub",
+                "url": "https://github.com/CVHub520/X-AnyLabeling",
+                "date_created": str(date.today())
+            },
+            "licenses": [
+                {
+                    "id": 1,
+                    "url": "https://www.gnu.org/licenses/gpl-3.0.html",
+                    "name": "GNU GENERAL PUBLIC LICENSE Version 3"
+                }
+            ],
+            "categories": [],
+            "images": [],
+            "annotations": []
+        }
+
+        for i, class_name in enumerate(self.classes):
+            coco_data['categories'].append({
+                "id": i + 1,
+                "name": class_name,
+                "supercategory": ""
+            })
+
+        image_id = 0
+        annotation_id = 0
+
+        file_list = os.listdir(input_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+
+            image_id += 1
+
+            input_file = os.path.join(input_path, file_name)
+            with open(input_file, 'r') as f:
+                data = json.load(f)
+
+            image_path = data['imagePath']
+            image_name = os.path.splitext(os.path.basename(image_path))[0]
+
+            coco_data['images'].append({
+                "id": image_id,
+                "file_name": image_name,
+                "width": data['imageWidth'],
+                "height": data['imageHeight'],
+                "license": 0,
+                "flickr_url": "",
+                "coco_url": "",
+                "date_captured": ""
+            })
+
+            for shape in data['shapes']:
+                annotation_id += 1
+                label = shape['label']
+                points = shape['points']
+                class_id = self.classes.index(label)
+                x_min = min(points[0][0], points[1][0])
+                y_min = min(points[0][1], points[1][1])
+                x_max = max(points[0][0], points[1][0])
+                y_max = max(points[0][1], points[1][1])
+                width = x_max - x_min
+                height = y_max - y_min
+
+                annotation = {
+                    "id": annotation_id,
+                    "image_id": image_id,
+                    "category_id": class_id + 1,
+                    "bbox": [x_min, y_min, width, height],
+                    "area": width * height,
+                    "iscrowd": 0
+                }
+
+                coco_data['annotations'].append(annotation)
+
+        output_file = os.path.join(output_path, "x_anylabeling_coco.json")
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(coco_data, f, indent=4, ensure_ascii=False)
+
+    def coco_to_custom(self, input_file, output_path, image_path):
+
+        img_dic = {}
+        for file in os.listdir(image_path):
+            img_dic[file] = file
+
+        with open(input_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+
+        if not self.classes:
+            for cat in data["categories"]:
+                self.classes.append(cat["name"])
+
+        total_info, label_info = {}, {}
+
+        # map category_id to name
+        for dic_info in data["categories"]:
+            label_info[dic_info["id"]] = dic_info["name"]
+
+        # map image_id to info
+        for dic_info in data["images"]:
+            total_info[dic_info["id"]] = {
+                "imageWidth": dic_info["width"],
+                "imageHeight": dic_info["height"],
+                "imagePath": img_dic[dic_info["file_name"]],
+                "shapes": []
+            }
+
+        for dic_info in data["annotations"]:
+            bbox = dic_info["bbox"]
+            x_min = bbox[0]
+            y_min = bbox[1]
+            width = bbox[2]
+            height = bbox[3]
+            x_max = x_min + width
+            y_max = y_min + height
+
+            shape_info = {
+                "label": self.classes[dic_info["category_id"] - 1],
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            total_info[dic_info["image_id"]]["shapes"].append(shape_info)
+
+        for dic_info in tqdm(total_info.values(), desc='Converting files', unit='file', colour='green'):
+            self.reset()
+            self.custom_data["shapes"] = dic_info["shapes"]
+            self.custom_data["imagePath"] = dic_info["imagePath"]
+            self.custom_data["imageHeight"] = dic_info["imageHeight"]
+            self.custom_data["imageWidth"] = dic_info["imageWidth"]
+
+            output_file = os.path.join(output_path, os.path.splitext(dic_info["imagePath"])[0] + ".json")
+            with open(output_file, 'w', encoding='utf-8') as f:
+                json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+
+class PolyLabelConvert(BaseLabelConverter):
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+        image_size = np.array([[image_width, image_height]])
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = np.array(shape['points'])
+                class_index = self.classes.index(label)
+                norm_points = points / image_size
+                f.write(f"{class_index} " + " ".join(
+                    [" ".join([str(cell[0]), str(cell[1])]) for cell in norm_points.tolist()]) + "\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+        image_size = np.array([image_width, image_height], np.float64)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            label = self.classes[class_index]
+            masks = line[1:]
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+            for x, y in zip(masks[0::2], masks[1::2]):
+                point = [np.float64(x), np.float64(y)]
+                point = np.array(point, np.float64) * image_size
+                shape['points'].append(point.tolist())
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Label Converter')
+    # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo
+    parser.add_argument('--task', default='polygon', choices=['rectangle', 'polygon'],
+                        help='Choose the type of task to perform')
+    parser.add_argument('--src_path', default='custom', help='Path to input directory')
+    parser.add_argument('--dst_path', default='yolo', help='Path to output directory')
+    parser.add_argument('--img_path', help='Path to image directory')
+    parser.add_argument('--classes', default='classes.txt',
+                        help='Path to classes.txt file, where each line represent a specific class')
+    parser.add_argument('--mode', default='custom2yolo', help='Choose the conversion mode what you need',
+                        choices=['custom2voc', 'voc2custom', 'custom2yolo', 'yolo2custom', 'custom2coco',
+                                 'coco2custom'])
+    args = parser.parse_args()
+    print(f"Starting conversion to {args.mode} format of {args.task}...")
+    start_time = time.time()
+
+    if args.task == 'rectangle':
+        converter = RectLabelConverter(args.classes)
+    elif args.task == 'polygon':
+        converter = PolyLabelConvert(args.classes)
+        valid_modes = ['custom2yolo', 'yolo2custom']
+        assert args.mode in valid_modes, f"Polygon tasks are only supported in {valid_modes} now!"
+
+    if args.mode == "custom2voc":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0] + '.xml')
+            converter.custom_to_voc2017(src_file, dst_file)
+    elif args.mode == "voc2custom":
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0] + '.json')
+            converter.voc2017_to_custom(src_file, dst_file)
+    elif args.mode == "custom2yolo":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0] + '.txt')
+            converter.custom_to_yolov5(src_file, dst_file)
+    elif args.mode == "yolo2custom":
+        img_dic = {}
+        for file in os.listdir(args.img_path):
+            prefix = file.split('.')[0]
+            img_dic[prefix] = file
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0] + '.json')
+            img_file = os.path.join(args.img_path, img_dic[os.path.splitext(file_name)[0]])
+            converter.yolov5_to_custom(src_file, dst_file, img_file)
+    elif args.mode == "custom2coco":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.custom_to_coco(args.src_path, args.dst_path)
+    elif args.mode == "coco2custom":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.coco_to_custom(args.src_path, args.dst_path, args.img_path)
+
+    end_time = time.time()
+    print(f"Conversion completed successfully: {args.dst_path}")
+    print(f"Conversion time: {end_time - start_time:.2f} seconds")
+
+
+if __name__ == '__main__':
+    main()

+ 78 - 0
xm_lidar/split_train_val.py

@@ -0,0 +1,78 @@
+# coding:utf-8
+# brief:根据指定目录下的json配置文件,将其随机分为训练集和测试集,并将路径写到dataSet目录下的txt文件中
+
+import os
+import random
+import json
+
+
+# directory-主路径
+# fileType-指定文件类型
+# fileList-目标类型文件列表(路径+文件名)
+def SearchFiles(directory, fileType):
+    fileList = []
+    for root, subDirs, files in os.walk(directory):
+        for fileName in files:
+            if fileName.endswith(fileType):
+                # json_file = open(directory + '/' + fileName, 'r', encoding='UTF-8')
+                # json_data = json.load(json_file)
+                # jsonName = json_data['imagePath']
+                # jsonName = jsonName.replace('汽车 ', 'car')
+                # jsonName = jsonName.replace('(', '_')
+                # jsonName = jsonName.replace(')', '')
+                # json_data['imagePath'] = jsonName
+                # json_file = open(directory + '/' + fileName, 'w', encoding='UTF-8')
+                # json.dump(json_data, json_file, indent=2, ensure_ascii=False)
+                # print(jsonName)
+                fileList.append(fileName)
+    # for fileName in fileList:
+    #     if fileName.find('汽车 '):
+    #         newName = fileName.replace('汽车 ', 'car')
+    #         newName = newName.replace('(', '_')
+    #         newName = newName.replace(')', '')
+    #         os.rename(fileName, newName)
+    return fileList
+
+
+if __name__ == '__main__':
+    run_path = 'D:/DeepLearning/pytorch-gpu117/xm_lidar/'
+    last_file_path = '/home/zx/doc/private_hub/yolov8/ultralytics-main/examples/train_xm_lidar/labels/'
+    # last_file_path = 'D:/DeepLearning/pytorch-gpu117/yolov8_study/train-seg/labels/'
+    txt_save_path = run_path + 'dataSet'
+    if not os.path.exists(txt_save_path):
+        os.makedirs(txt_save_path)
+
+    trainval_percent = 1
+    train_percent = 0.9
+
+    total_json = SearchFiles(run_path + 'labels', '.txt')
+
+    print(total_json)
+
+    num = len(total_json)
+    list_index = range(num)
+    tv = int(num * trainval_percent)
+    tr = int(tv * train_percent)
+    trainval = random.sample(list_index, tv)
+    train = random.sample(trainval, tr)
+
+    file_trainval = open(txt_save_path + '/trainval.txt', 'w')
+    file_test = open(txt_save_path + '/test.txt', 'w')
+    file_train = open(txt_save_path + '/train.txt', 'w')
+    file_val = open(txt_save_path + '/val.txt', 'w')
+
+    for i in list_index:
+        name = last_file_path + total_json[i][:-3] + 'jpg\n'
+        if i in trainval:
+            file_trainval.write(name)
+            if i in train:
+                file_train.write(name)
+            else:
+                file_val.write(name)
+        else:
+            file_test.write(name)
+
+    file_trainval.close()
+    file_train.close()
+    file_val.close()
+    file_test.close()

+ 46 - 0
xm_lidar/yolov8-seg.yaml

@@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+
+# Parameters
+nc: 2  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Segment(P3, P4, P5)

+ 18 - 0
yolov8_study/detect.py

@@ -0,0 +1,18 @@
+import os
+import os
+from ultralytics import YOLO
+
+# model = YOLO("data.yaml")
+model = YOLO("runs/train-seg2/weights/last.pt")
+# success = model.export(format="onnx", half=False, dynamic=True, opset=17)
+success = model.export(format="onnx")
+print("export model well.")
+exit(1)
+data_file = "D:\DeepLearning\pytorch-gpu117\yolov8_study\\train-seg\data.yaml"
+model_file = "D:\DeepLearning\pytorch-gpu117\yolov8_study\\train-seg\yolov8-seg.yaml"
+model = YOLO(model_file)
+results = model.train(data=data_file, epochs=1000, batch=4)
+
+# ret = os.system("yolo task=detect mode=predict model=best.pt source=D:/DeepLearning/Dataset/car/car_201.jpg imgsz=640 show=True save=True")
+# # yolo mode=export model=best.pt format=onnx dynamic=False #simplify=True
+# print(ret)

+ 8 - 0
yolov8_study/train-seg/data.yaml

@@ -0,0 +1,8 @@
+train: "D:/DeepLearning/pytorch-gpu117/yolov8_study/train-seg/dataSet/train.txt"
+val: "D:/DeepLearning/pytorch-gpu117/yolov8_study/train-seg/dataSet/val.txt"
+
+names:
+  0: car
+  1: wheel
+
+

+ 471 - 0
yolov8_study/train-seg/label_converter.py

@@ -0,0 +1,471 @@
+import argparse
+import json
+import os
+import time
+
+from PIL import Image
+from tqdm import tqdm
+from datetime import date
+
+import numpy as np
+import xml.dom.minidom as minidom
+import xml.etree.ElementTree as ET
+
+import sys
+sys.path.append('.')
+from anylabeling.app_info import __version__
+
+
+#======================================================================= Usage ========================================================================#
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2voc  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2voc                                      #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- voc2custom  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode voc2custom                                      #
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2yolo  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo                                     #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- yolo2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode yolo2custom               #
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2coco  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2coco                                     #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- coco2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx.json --dst_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode coco2custom                 #                             
+#                                                                                                                                                      #
+#======================================================================= Usage ========================================================================#
+
+
+VERSION = __version__
+
+
+class BaseLabelConverter:
+    def __init__(self, classes_file):
+
+        if classes_file:
+            with open(classes_file, 'r') as f:
+                self.classes = f.read().splitlines()
+        else:
+            self.classes = []
+
+    def reset(self):
+        self.custom_data = dict(
+            version=VERSION,
+            flags={},
+            shapes=[],
+            imagePath="",
+            imageData=None,
+            imageHeight=-1,
+            imageWidth=-1
+        )
+
+    def get_image_size(self, image_file):
+        with Image.open(image_file) as img:
+            width, height = img.size
+            return width, height
+
+class RectLabelConverter(BaseLabelConverter):
+
+    def custom_to_voc2017(self, input_file, output_dir):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_path = data['imagePath']
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        root = ET.Element('annotation')
+        ET.SubElement(root, 'folder').text = os.path.dirname(output_dir)
+        ET.SubElement(root, 'filename').text = os.path.basename(image_path)
+        size = ET.SubElement(root, 'size')
+        ET.SubElement(size, 'width').text = str(image_width)
+        ET.SubElement(size, 'height').text = str(image_height)
+        ET.SubElement(size, 'depth').text = '3'
+
+        for shape in data['shapes']:
+            label = shape['label']
+            points = shape['points']
+
+            xmin = str(points[0][0])
+            ymin = str(points[0][1])
+            xmax = str(points[1][0])
+            ymax = str(points[1][1])
+
+            object_elem = ET.SubElement(root, 'object')
+            ET.SubElement(object_elem, 'name').text = label
+            ET.SubElement(object_elem, 'pose').text = 'Unspecified'
+            ET.SubElement(object_elem, 'truncated').text = '0'
+            ET.SubElement(object_elem, 'difficult').text = '0'
+            bndbox = ET.SubElement(object_elem, 'bndbox')
+            ET.SubElement(bndbox, 'xmin').text = xmin
+            ET.SubElement(bndbox, 'ymin').text = ymin
+            ET.SubElement(bndbox, 'xmax').text = xmax
+            ET.SubElement(bndbox, 'ymax').text = ymax
+
+        xml_string = ET.tostring(root, encoding='utf-8')
+        dom = minidom.parseString(xml_string)
+        formatted_xml = dom.toprettyxml(indent='  ')
+
+        with open(output_dir, 'w') as f:
+            f.write(formatted_xml)
+
+    def voc2017_to_custom(self, input_file, output_file):
+        self.reset()
+
+        tree = ET.parse(input_file)
+        root = tree.getroot()
+
+        image_path = root.find('filename').text
+        image_width = int(root.find('size/width').text)
+        image_height = int(root.find('size/height').text)
+
+        self.custom_data['imagePath'] = image_path
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        for obj in root.findall('object'):
+            label = obj.find('name').text
+            xmin = float(obj.find('bndbox/xmin').text)
+            ymin = float(obj.find('bndbox/ymin').text)
+            xmax = float(obj.find('bndbox/xmax').text)
+            ymax = float(obj.find('bndbox/ymax').text)
+
+            shape = {
+                "label": label,
+                "text": "",
+                "points": [[xmin, ymin], [xmax, ymax]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = shape['points']
+
+                class_index = self.classes.index(label)
+
+                x_center = (points[0][0] + points[1][0]) / (2 * image_width)
+                y_center = (points[0][1] + points[1][1]) / (2 * image_height)
+                width = abs(points[1][0] - points[0][0]) / image_width
+                height = abs(points[1][1] - points[0][1]) / image_height
+
+                f.write(f"{class_index} {x_center} {y_center} {width} {height}\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            x_center = float(line[1])
+            y_center = float(line[2])
+            width = float(line[3])
+            height = float(line[4])
+
+            x_min = int((x_center - width / 2) * image_width)
+            y_min = int((y_center - height / 2) * image_height)
+            x_max = int((x_center + width / 2) * image_width)
+            y_max = int((y_center + height / 2) * image_height)
+
+            label = self.classes[class_index]
+
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_coco(self, input_path, output_path):
+        coco_data = {
+            "info": {
+                "year": 2023,
+                "version": VERSION,
+                "description": "COCO Label Conversion",
+                "contributor": "CVHub",
+                "url": "https://github.com/CVHub520/X-AnyLabeling",
+                "date_created": str(date.today())
+            },
+            "licenses": [
+                {
+                    "id": 1,
+                    "url": "https://www.gnu.org/licenses/gpl-3.0.html",
+                    "name": "GNU GENERAL PUBLIC LICENSE Version 3"
+                }
+            ],
+            "categories": [],
+            "images": [],
+            "annotations": []
+        }
+
+        for i, class_name in enumerate(self.classes):
+            coco_data['categories'].append({
+                "id": i+1,
+                "name": class_name,
+                "supercategory": ""
+            })
+
+        image_id = 0
+        annotation_id = 0
+
+        file_list = os.listdir(input_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+
+            image_id += 1
+
+            input_file = os.path.join(input_path, file_name)
+            with open(input_file, 'r') as f:
+                data = json.load(f)
+
+            image_path = data['imagePath']
+            image_name = os.path.splitext(os.path.basename(image_path))[0]
+
+            coco_data['images'].append({
+                "id": image_id,
+                "file_name": image_name,
+                "width": data['imageWidth'],
+                "height": data['imageHeight'],
+                "license": 0,
+                "flickr_url": "",
+                "coco_url": "",
+                "date_captured": ""
+            })
+
+            for shape in data['shapes']:
+                annotation_id += 1
+                label = shape['label']
+                points = shape['points']
+                class_id = self.classes.index(label)
+                x_min = min(points[0][0], points[1][0])
+                y_min = min(points[0][1], points[1][1])
+                x_max = max(points[0][0], points[1][0])
+                y_max = max(points[0][1], points[1][1])
+                width = x_max - x_min
+                height = y_max - y_min
+
+                annotation = {
+                    "id": annotation_id,
+                    "image_id": image_id,
+                    "category_id": class_id+1,
+                    "bbox": [x_min, y_min, width, height],
+                    "area": width * height,
+                    "iscrowd": 0
+                }
+
+                coco_data['annotations'].append(annotation)
+
+        output_file = os.path.join(output_path, "x_anylabeling_coco.json")
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(coco_data, f, indent=4, ensure_ascii=False)
+
+    def coco_to_custom(self, input_file, output_path, image_path):
+
+        img_dic = {}
+        for file in os.listdir(image_path):
+            img_dic[file] = file
+
+        with open(input_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            
+        if not self.classes:
+            for cat in data["categories"]:
+                self.classes.append(cat["name"])
+
+        total_info, label_info = {}, {}
+
+        # map category_id to name
+        for dic_info in data["categories"]:
+            label_info[dic_info["id"]] = dic_info["name"]
+
+        # map image_id to info
+        for dic_info in data["images"]:
+            total_info[dic_info["id"]] = {
+                "imageWidth": dic_info["width"],
+                "imageHeight": dic_info["height"],
+                "imagePath": img_dic[dic_info["file_name"]],
+                "shapes": []
+            }
+        
+        for dic_info in data["annotations"]:
+
+            bbox = dic_info["bbox"]
+            x_min = bbox[0]
+            y_min = bbox[1]
+            width = bbox[2]
+            height = bbox[3]
+            x_max = x_min + width
+            y_max = y_min + height
+
+            shape_info = {
+                "label": self.classes[dic_info["category_id"]-1],
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            total_info[dic_info["image_id"]]["shapes"].append(shape_info)
+    
+        for dic_info in tqdm(total_info.values(), desc='Converting files', unit='file', colour='green'):
+            self.reset()
+            self.custom_data["shapes"] = dic_info["shapes"]
+            self.custom_data["imagePath"] = dic_info["imagePath"]
+            self.custom_data["imageHeight"] = dic_info["imageHeight"]
+            self.custom_data["imageWidth"] = dic_info["imageWidth"]
+
+            output_file = os.path.join(output_path, os.path.splitext(dic_info["imagePath"])[0]+".json")
+            with open(output_file, 'w', encoding='utf-8') as f:
+                json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+class PolyLabelConvert(BaseLabelConverter):
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+        image_size = np.array([[image_width, image_height]])
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = np.array(shape['points'])
+                class_index = self.classes.index(label)
+                norm_points = points / image_size
+                f.write(f"{class_index} " + " ".join([" ".join([str(cell[0]), str(cell[1])]) for cell in norm_points.tolist()]) + "\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+        image_size = np.array([image_width, image_height], np.float64)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            label = self.classes[class_index]
+            masks = line[1:]
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+            for x, y in zip(masks[0::2], masks[1::2]):
+                point = [np.float64(x), np.float64(y)]
+                point = np.array(point, np.float64) * image_size
+                shape['points'].append(point.tolist())
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+def main():
+    parser = argparse.ArgumentParser(description='Label Converter')
+    # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo
+    parser.add_argument('--task', default='polygon', choices=['rectangle', 'polygon'], help='Choose the type of task to perform')
+    parser.add_argument('--src_path', default='custom', help='Path to input directory')
+    parser.add_argument('--dst_path', default='yolo', help='Path to output directory')
+    parser.add_argument('--img_path', help='Path to image directory')
+    parser.add_argument('--classes', default='classes.txt', help='Path to classes.txt file, where each line represent a specific class')
+    parser.add_argument('--mode', default='custom2yolo', help='Choose the conversion mode what you need',
+                        choices=['custom2voc', 'voc2custom', 'custom2yolo', 'yolo2custom', 'custom2coco', 'coco2custom'])
+    args = parser.parse_args()
+    print(f"Starting conversion to {args.mode} format of {args.task}...")
+    start_time = time.time()
+
+    if args.task == 'rectangle':
+        converter = RectLabelConverter(args.classes)
+    elif args.task == 'polygon':
+        converter = PolyLabelConvert(args.classes)
+        valid_modes = ['custom2yolo', 'yolo2custom']
+        assert args.mode in valid_modes, f"Polygon tasks are only supported in {valid_modes} now!"
+
+    if args.mode == "custom2voc":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0]+'.xml')
+            converter.custom_to_voc2017(src_file, dst_file)
+    elif args.mode == "voc2custom":
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0]+'.json')
+            converter.voc2017_to_custom(src_file, dst_file)
+    elif args.mode == "custom2yolo":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0]+'.txt')
+            converter.custom_to_yolov5(src_file, dst_file)
+    elif args.mode == "yolo2custom":
+        img_dic = {}
+        for file in os.listdir(args.img_path):
+            prefix = file.split('.')[0]
+            img_dic[prefix] = file
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0]+'.json')
+            img_file = os.path.join(args.img_path, img_dic[os.path.splitext(file_name)[0]])
+            converter.yolov5_to_custom(src_file, dst_file, img_file)
+    elif args.mode == "custom2coco":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.custom_to_coco(args.src_path, args.dst_path)
+    elif args.mode == "coco2custom":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.coco_to_custom(args.src_path, args.dst_path, args.img_path)
+
+    end_time = time.time()
+    print(f"Conversion completed successfully: {args.dst_path}")
+    print(f"Conversion time: {end_time - start_time:.2f} seconds")
+
+if __name__ == '__main__':
+    main()

二进制
yolov8_study/train-seg/labels.cache


+ 78 - 0
yolov8_study/train-seg/split_train_val.py

@@ -0,0 +1,78 @@
+# coding:utf-8
+# brief:根据指定目录下的json配置文件,将其随机分为训练集和测试集,并将路径写到dataSet目录下的txt文件中
+ 
+import os
+import random
+import json
+
+
+# directory-主路径
+# fileType-指定文件类型
+# fileList-目标类型文件列表(路径+文件名)
+def SearchFiles(directory, fileType):
+    fileList=[]
+    for root, subDirs, files in os.walk(directory):
+        for fileName in files:
+            if fileName.endswith(fileType):
+                # json_file = open(directory + '/' + fileName, 'r', encoding='UTF-8')
+                # json_data = json.load(json_file)
+                # jsonName = json_data['imagePath']
+                # jsonName = jsonName.replace('汽车 ', 'car')
+                # jsonName = jsonName.replace('(', '_')
+                # jsonName = jsonName.replace(')', '')
+                # json_data['imagePath'] = jsonName
+                # json_file = open(directory + '/' + fileName, 'w', encoding='UTF-8')
+                # json.dump(json_data, json_file, indent=2, ensure_ascii=False)
+                # print(jsonName)
+                fileList.append(fileName)
+    # for fileName in fileList:
+    #     if fileName.find('汽车 '):
+    #         newName = fileName.replace('汽车 ', 'car')
+    #         newName = newName.replace('(', '_')
+    #         newName = newName.replace(')', '')
+    #         os.rename(fileName, newName)
+    return fileList
+
+
+if __name__ == '__main__':
+    run_path = 'D:/DeepLearning/pytorch-gpu117/yolov8_study/train-seg/'
+    # last_file_path = '/home/zx/doc/private_hub/yolov8/ultralytics-main/examples/train/labels/'
+    last_file_path = 'D:/DeepLearning/pytorch-gpu117/yolov8_study/train-seg/labels/'
+    txt_save_path = run_path + 'dataSet'
+    if not os.path.exists(txt_save_path):
+        os.makedirs(txt_save_path)
+
+    trainval_percent = 1
+    train_percent = 0.9
+
+    total_json = SearchFiles(run_path + 'labels', '.txt')
+
+    print(total_json)
+
+    num = len(total_json)
+    list_index = range(num)
+    tv = int(num * trainval_percent)
+    tr = int(tv * train_percent)
+    trainval = random.sample(list_index, tv)
+    train = random.sample(trainval, tr)
+
+    file_trainval = open(txt_save_path + '/trainval.txt', 'w')
+    file_test = open(txt_save_path + '/test.txt', 'w')
+    file_train = open(txt_save_path + '/train.txt', 'w')
+    file_val = open(txt_save_path + '/val.txt', 'w')
+
+    for i in list_index:
+        name = last_file_path + total_json[i][:-3] + 'jpg\n'
+        if i in trainval:
+            file_trainval.write(name)
+            if i in train:
+                file_train.write(name)
+            else:
+                file_val.write(name)
+        else:
+            file_test.write(name)
+
+    file_trainval.close()
+    file_train.close()
+    file_val.close()
+    file_test.close()

+ 0 - 0
yolov8_study/train-seg/warning.md


+ 46 - 0
yolov8_study/train-seg/yolov8-seg.yaml

@@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+
+# Parameters
+nc: 2  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Segment(P3, P4, P5)

+ 8 - 0
yolov8_study/train/data.yaml

@@ -0,0 +1,8 @@
+train: "D:/DeepLearning/pytorch-gpu117/yolov8_study/train/dataSet/train.txt"
+val: "D:/DeepLearning/pytorch-gpu117/yolov8_study/train/dataSet/val.txt"
+
+names:
+  0: car
+  1: wheel
+
+

+ 471 - 0
yolov8_study/train/label_converter.py

@@ -0,0 +1,471 @@
+import argparse
+import json
+import os
+import time
+
+from PIL import Image
+from tqdm import tqdm
+from datetime import date
+
+import numpy as np
+import xml.dom.minidom as minidom
+import xml.etree.ElementTree as ET
+
+import sys
+sys.path.append('.')
+from anylabeling.app_info import __version__
+
+
+#======================================================================= Usage ========================================================================#
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2voc  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2voc                                      #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- voc2custom  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode voc2custom                                      #
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2yolo  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo                                     #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- yolo2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode yolo2custom               #
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2coco  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2coco                                     #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- coco2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx.json --dst_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode coco2custom                 #                             
+#                                                                                                                                                      #
+#======================================================================= Usage ========================================================================#
+
+
+VERSION = __version__
+
+
+class BaseLabelConverter:
+    def __init__(self, classes_file):
+
+        if classes_file:
+            with open(classes_file, 'r') as f:
+                self.classes = f.read().splitlines()
+        else:
+            self.classes = []
+
+    def reset(self):
+        self.custom_data = dict(
+            version=VERSION,
+            flags={},
+            shapes=[],
+            imagePath="",
+            imageData=None,
+            imageHeight=-1,
+            imageWidth=-1
+        )
+
+    def get_image_size(self, image_file):
+        with Image.open(image_file) as img:
+            width, height = img.size
+            return width, height
+
+class RectLabelConverter(BaseLabelConverter):
+
+    def custom_to_voc2017(self, input_file, output_dir):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_path = data['imagePath']
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        root = ET.Element('annotation')
+        ET.SubElement(root, 'folder').text = os.path.dirname(output_dir)
+        ET.SubElement(root, 'filename').text = os.path.basename(image_path)
+        size = ET.SubElement(root, 'size')
+        ET.SubElement(size, 'width').text = str(image_width)
+        ET.SubElement(size, 'height').text = str(image_height)
+        ET.SubElement(size, 'depth').text = '3'
+
+        for shape in data['shapes']:
+            label = shape['label']
+            points = shape['points']
+
+            xmin = str(points[0][0])
+            ymin = str(points[0][1])
+            xmax = str(points[1][0])
+            ymax = str(points[1][1])
+
+            object_elem = ET.SubElement(root, 'object')
+            ET.SubElement(object_elem, 'name').text = label
+            ET.SubElement(object_elem, 'pose').text = 'Unspecified'
+            ET.SubElement(object_elem, 'truncated').text = '0'
+            ET.SubElement(object_elem, 'difficult').text = '0'
+            bndbox = ET.SubElement(object_elem, 'bndbox')
+            ET.SubElement(bndbox, 'xmin').text = xmin
+            ET.SubElement(bndbox, 'ymin').text = ymin
+            ET.SubElement(bndbox, 'xmax').text = xmax
+            ET.SubElement(bndbox, 'ymax').text = ymax
+
+        xml_string = ET.tostring(root, encoding='utf-8')
+        dom = minidom.parseString(xml_string)
+        formatted_xml = dom.toprettyxml(indent='  ')
+
+        with open(output_dir, 'w') as f:
+            f.write(formatted_xml)
+
+    def voc2017_to_custom(self, input_file, output_file):
+        self.reset()
+
+        tree = ET.parse(input_file)
+        root = tree.getroot()
+
+        image_path = root.find('filename').text
+        image_width = int(root.find('size/width').text)
+        image_height = int(root.find('size/height').text)
+
+        self.custom_data['imagePath'] = image_path
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        for obj in root.findall('object'):
+            label = obj.find('name').text
+            xmin = float(obj.find('bndbox/xmin').text)
+            ymin = float(obj.find('bndbox/ymin').text)
+            xmax = float(obj.find('bndbox/xmax').text)
+            ymax = float(obj.find('bndbox/ymax').text)
+
+            shape = {
+                "label": label,
+                "text": "",
+                "points": [[xmin, ymin], [xmax, ymax]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = shape['points']
+
+                class_index = self.classes.index(label)
+
+                x_center = (points[0][0] + points[1][0]) / (2 * image_width)
+                y_center = (points[0][1] + points[1][1]) / (2 * image_height)
+                width = abs(points[1][0] - points[0][0]) / image_width
+                height = abs(points[1][1] - points[0][1]) / image_height
+
+                f.write(f"{class_index} {x_center} {y_center} {width} {height}\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            x_center = float(line[1])
+            y_center = float(line[2])
+            width = float(line[3])
+            height = float(line[4])
+
+            x_min = int((x_center - width / 2) * image_width)
+            y_min = int((y_center - height / 2) * image_height)
+            x_max = int((x_center + width / 2) * image_width)
+            y_max = int((y_center + height / 2) * image_height)
+
+            label = self.classes[class_index]
+
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_coco(self, input_path, output_path):
+        coco_data = {
+            "info": {
+                "year": 2023,
+                "version": VERSION,
+                "description": "COCO Label Conversion",
+                "contributor": "CVHub",
+                "url": "https://github.com/CVHub520/X-AnyLabeling",
+                "date_created": str(date.today())
+            },
+            "licenses": [
+                {
+                    "id": 1,
+                    "url": "https://www.gnu.org/licenses/gpl-3.0.html",
+                    "name": "GNU GENERAL PUBLIC LICENSE Version 3"
+                }
+            ],
+            "categories": [],
+            "images": [],
+            "annotations": []
+        }
+
+        for i, class_name in enumerate(self.classes):
+            coco_data['categories'].append({
+                "id": i+1,
+                "name": class_name,
+                "supercategory": ""
+            })
+
+        image_id = 0
+        annotation_id = 0
+
+        file_list = os.listdir(input_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+
+            image_id += 1
+
+            input_file = os.path.join(input_path, file_name)
+            with open(input_file, 'r') as f:
+                data = json.load(f)
+
+            image_path = data['imagePath']
+            image_name = os.path.splitext(os.path.basename(image_path))[0]
+
+            coco_data['images'].append({
+                "id": image_id,
+                "file_name": image_name,
+                "width": data['imageWidth'],
+                "height": data['imageHeight'],
+                "license": 0,
+                "flickr_url": "",
+                "coco_url": "",
+                "date_captured": ""
+            })
+
+            for shape in data['shapes']:
+                annotation_id += 1
+                label = shape['label']
+                points = shape['points']
+                class_id = self.classes.index(label)
+                x_min = min(points[0][0], points[1][0])
+                y_min = min(points[0][1], points[1][1])
+                x_max = max(points[0][0], points[1][0])
+                y_max = max(points[0][1], points[1][1])
+                width = x_max - x_min
+                height = y_max - y_min
+
+                annotation = {
+                    "id": annotation_id,
+                    "image_id": image_id,
+                    "category_id": class_id+1,
+                    "bbox": [x_min, y_min, width, height],
+                    "area": width * height,
+                    "iscrowd": 0
+                }
+
+                coco_data['annotations'].append(annotation)
+
+        output_file = os.path.join(output_path, "x_anylabeling_coco.json")
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(coco_data, f, indent=4, ensure_ascii=False)
+
+    def coco_to_custom(self, input_file, output_path, image_path):
+
+        img_dic = {}
+        for file in os.listdir(image_path):
+            img_dic[file] = file
+
+        with open(input_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            
+        if not self.classes:
+            for cat in data["categories"]:
+                self.classes.append(cat["name"])
+
+        total_info, label_info = {}, {}
+
+        # map category_id to name
+        for dic_info in data["categories"]:
+            label_info[dic_info["id"]] = dic_info["name"]
+
+        # map image_id to info
+        for dic_info in data["images"]:
+            total_info[dic_info["id"]] = {
+                "imageWidth": dic_info["width"],
+                "imageHeight": dic_info["height"],
+                "imagePath": img_dic[dic_info["file_name"]],
+                "shapes": []
+            }
+        
+        for dic_info in data["annotations"]:
+
+            bbox = dic_info["bbox"]
+            x_min = bbox[0]
+            y_min = bbox[1]
+            width = bbox[2]
+            height = bbox[3]
+            x_max = x_min + width
+            y_max = y_min + height
+
+            shape_info = {
+                "label": self.classes[dic_info["category_id"]-1],
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            total_info[dic_info["image_id"]]["shapes"].append(shape_info)
+    
+        for dic_info in tqdm(total_info.values(), desc='Converting files', unit='file', colour='green'):
+            self.reset()
+            self.custom_data["shapes"] = dic_info["shapes"]
+            self.custom_data["imagePath"] = dic_info["imagePath"]
+            self.custom_data["imageHeight"] = dic_info["imageHeight"]
+            self.custom_data["imageWidth"] = dic_info["imageWidth"]
+
+            output_file = os.path.join(output_path, os.path.splitext(dic_info["imagePath"])[0]+".json")
+            with open(output_file, 'w', encoding='utf-8') as f:
+                json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+class PolyLabelConvert(BaseLabelConverter):
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+        image_size = np.array([[image_width, image_height]])
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = np.array(shape['points'])
+                class_index = self.classes.index(label)
+                norm_points = points / image_size
+                f.write(f"{class_index} " + " ".join([" ".join([str(cell[0]), str(cell[1])]) for cell in norm_points.tolist()]) + "\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+        image_size = np.array([image_width, image_height], np.float64)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            label = self.classes[class_index]
+            masks = line[1:]
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+            for x, y in zip(masks[0::2], masks[1::2]):
+                point = [np.float64(x), np.float64(y)]
+                point = np.array(point, np.float64) * image_size
+                shape['points'].append(point.tolist())
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+def main():
+    parser = argparse.ArgumentParser(description='Label Converter')
+    # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo
+    parser.add_argument('--task', default='polygon', choices=['rectangle', 'polygon'], help='Choose the type of task to perform')
+    parser.add_argument('--src_path', default='custom', help='Path to input directory')
+    parser.add_argument('--dst_path', default='yolo', help='Path to output directory')
+    parser.add_argument('--img_path', help='Path to image directory')
+    parser.add_argument('--classes', default='classes.txt', help='Path to classes.txt file, where each line represent a specific class')
+    parser.add_argument('--mode', default='custom2yolo', help='Choose the conversion mode what you need',
+                        choices=['custom2voc', 'voc2custom', 'custom2yolo', 'yolo2custom', 'custom2coco', 'coco2custom'])
+    args = parser.parse_args()
+    print(f"Starting conversion to {args.mode} format of {args.task}...")
+    start_time = time.time()
+
+    if args.task == 'rectangle':
+        converter = RectLabelConverter(args.classes)
+    elif args.task == 'polygon':
+        converter = PolyLabelConvert(args.classes)
+        valid_modes = ['custom2yolo', 'yolo2custom']
+        assert args.mode in valid_modes, f"Polygon tasks are only supported in {valid_modes} now!"
+
+    if args.mode == "custom2voc":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0]+'.xml')
+            converter.custom_to_voc2017(src_file, dst_file)
+    elif args.mode == "voc2custom":
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0]+'.json')
+            converter.voc2017_to_custom(src_file, dst_file)
+    elif args.mode == "custom2yolo":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0]+'.txt')
+            converter.custom_to_yolov5(src_file, dst_file)
+    elif args.mode == "yolo2custom":
+        img_dic = {}
+        for file in os.listdir(args.img_path):
+            prefix = file.split('.')[0]
+            img_dic[prefix] = file
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0]+'.json')
+            img_file = os.path.join(args.img_path, img_dic[os.path.splitext(file_name)[0]])
+            converter.yolov5_to_custom(src_file, dst_file, img_file)
+    elif args.mode == "custom2coco":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.custom_to_coco(args.src_path, args.dst_path)
+    elif args.mode == "coco2custom":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.coco_to_custom(args.src_path, args.dst_path, args.img_path)
+
+    end_time = time.time()
+    print(f"Conversion completed successfully: {args.dst_path}")
+    print(f"Conversion time: {end_time - start_time:.2f} seconds")
+
+if __name__ == '__main__':
+    main()

二进制
yolov8_study/train/labels.cache


+ 76 - 0
yolov8_study/train/split_train_val.py

@@ -0,0 +1,76 @@
+# coding:utf-8
+# brief:根据指定目录下的json配置文件,将其随机分为训练集和测试集,并将路径写到dataSet目录下的txt文件中
+ 
+import os
+import random
+import json
+
+
+# directory-主路径
+# fileType-指定文件类型
+# fileList-目标类型文件列表(路径+文件名)
+def SearchFiles(directory, fileType):
+    fileList=[]
+    for root, subDirs, files in os.walk(directory):
+        for fileName in files:
+            if fileName.endswith(fileType):
+                # json_file = open(directory + '/' + fileName, 'r', encoding='UTF-8')
+                # json_data = json.load(json_file)
+                # jsonName = json_data['imagePath']
+                # jsonName = jsonName.replace('汽车 ', 'car')
+                # jsonName = jsonName.replace('(', '_')
+                # jsonName = jsonName.replace(')', '')
+                # json_data['imagePath'] = jsonName
+                # json_file = open(directory + '/' + fileName, 'w', encoding='UTF-8')
+                # json.dump(json_data, json_file, indent=2, ensure_ascii=False)
+                # print(jsonName)
+                fileList.append(directory + '/' + fileName)
+    # for fileName in fileList:
+    #     if fileName.find('汽车 '):
+    #         newName = fileName.replace('汽车 ', 'car')
+    #         newName = newName.replace('(', '_')
+    #         newName = newName.replace(')', '')
+    #         os.rename(fileName, newName)
+    return fileList
+
+
+if __name__ == '__main__':
+    run_path = 'D:/DeepLearning/pytorch-gpu117/yolov8_study/train/'
+    txt_save_path = run_path + 'dataSet'
+    if not os.path.exists(txt_save_path):
+        os.makedirs(txt_save_path)
+
+    trainval_percent = 1
+    train_percent = 0.9
+
+    total_json = SearchFiles(run_path + 'labels', '.jpg')
+
+    print(total_json)
+
+    num = len(total_json)
+    list_index = range(num)
+    tv = int(num * trainval_percent)
+    tr = int(tv * train_percent)
+    trainval = random.sample(list_index, tv)
+    train = random.sample(trainval, tr)
+
+    file_trainval = open(txt_save_path + '/trainval.txt', 'w')
+    file_test = open(txt_save_path + '/test.txt', 'w')
+    file_train = open(txt_save_path + '/train.txt', 'w')
+    file_val = open(txt_save_path + '/val.txt', 'w')
+
+    for i in list_index:
+        name = total_json[i] + '\n'
+        if i in trainval:
+            file_trainval.write(name)
+            if i in train:
+                file_train.write(name)
+            else:
+                file_val.write(name)
+        else:
+            file_test.write(name)
+
+    file_trainval.close()
+    file_train.close()
+    file_val.close()
+    file_test.close()

+ 0 - 0
yolov8_study/train/warning.md


+ 471 - 0
yolov8_study/train1/label_converter.py

@@ -0,0 +1,471 @@
+import argparse
+import json
+import os
+import time
+
+from PIL import Image
+from tqdm import tqdm
+from datetime import date
+
+import numpy as np
+import xml.dom.minidom as minidom
+import xml.etree.ElementTree as ET
+
+import sys
+sys.path.append('.')
+from anylabeling.app_info import __version__
+
+
+#======================================================================= Usage ========================================================================#
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2voc  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2voc                                      #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- voc2custom  ---------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode voc2custom                                      #
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2yolo  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo                                     #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- yolo2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode yolo2custom               #
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- custom2coco  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2coco                                     #                             
+#                                                                                                                                                      #
+#-------------------------------------------------------------------- coco2custom  --------------------------------------------------------------------#
+# python tools/label_converter.py --src_path xxx.json --dst_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode coco2custom                 #                             
+#                                                                                                                                                      #
+#======================================================================= Usage ========================================================================#
+
+
+VERSION = __version__
+
+
+class BaseLabelConverter:
+    def __init__(self, classes_file):
+
+        if classes_file:
+            with open(classes_file, 'r') as f:
+                self.classes = f.read().splitlines()
+        else:
+            self.classes = []
+
+    def reset(self):
+        self.custom_data = dict(
+            version=VERSION,
+            flags={},
+            shapes=[],
+            imagePath="",
+            imageData=None,
+            imageHeight=-1,
+            imageWidth=-1
+        )
+
+    def get_image_size(self, image_file):
+        with Image.open(image_file) as img:
+            width, height = img.size
+            return width, height
+
+class RectLabelConverter(BaseLabelConverter):
+
+    def custom_to_voc2017(self, input_file, output_dir):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_path = data['imagePath']
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        root = ET.Element('annotation')
+        ET.SubElement(root, 'folder').text = os.path.dirname(output_dir)
+        ET.SubElement(root, 'filename').text = os.path.basename(image_path)
+        size = ET.SubElement(root, 'size')
+        ET.SubElement(size, 'width').text = str(image_width)
+        ET.SubElement(size, 'height').text = str(image_height)
+        ET.SubElement(size, 'depth').text = '3'
+
+        for shape in data['shapes']:
+            label = shape['label']
+            points = shape['points']
+
+            xmin = str(points[0][0])
+            ymin = str(points[0][1])
+            xmax = str(points[1][0])
+            ymax = str(points[1][1])
+
+            object_elem = ET.SubElement(root, 'object')
+            ET.SubElement(object_elem, 'name').text = label
+            ET.SubElement(object_elem, 'pose').text = 'Unspecified'
+            ET.SubElement(object_elem, 'truncated').text = '0'
+            ET.SubElement(object_elem, 'difficult').text = '0'
+            bndbox = ET.SubElement(object_elem, 'bndbox')
+            ET.SubElement(bndbox, 'xmin').text = xmin
+            ET.SubElement(bndbox, 'ymin').text = ymin
+            ET.SubElement(bndbox, 'xmax').text = xmax
+            ET.SubElement(bndbox, 'ymax').text = ymax
+
+        xml_string = ET.tostring(root, encoding='utf-8')
+        dom = minidom.parseString(xml_string)
+        formatted_xml = dom.toprettyxml(indent='  ')
+
+        with open(output_dir, 'w') as f:
+            f.write(formatted_xml)
+
+    def voc2017_to_custom(self, input_file, output_file):
+        self.reset()
+
+        tree = ET.parse(input_file)
+        root = tree.getroot()
+
+        image_path = root.find('filename').text
+        image_width = int(root.find('size/width').text)
+        image_height = int(root.find('size/height').text)
+
+        self.custom_data['imagePath'] = image_path
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        for obj in root.findall('object'):
+            label = obj.find('name').text
+            xmin = float(obj.find('bndbox/xmin').text)
+            ymin = float(obj.find('bndbox/ymin').text)
+            xmax = float(obj.find('bndbox/xmax').text)
+            ymax = float(obj.find('bndbox/ymax').text)
+
+            shape = {
+                "label": label,
+                "text": "",
+                "points": [[xmin, ymin], [xmax, ymax]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = shape['points']
+
+                class_index = self.classes.index(label)
+
+                x_center = (points[0][0] + points[1][0]) / (2 * image_width)
+                y_center = (points[0][1] + points[1][1]) / (2 * image_height)
+                width = abs(points[1][0] - points[0][0]) / image_width
+                height = abs(points[1][1] - points[0][1]) / image_height
+
+                f.write(f"{class_index} {x_center} {y_center} {width} {height}\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            x_center = float(line[1])
+            y_center = float(line[2])
+            width = float(line[3])
+            height = float(line[4])
+
+            x_min = int((x_center - width / 2) * image_width)
+            y_min = int((y_center - height / 2) * image_height)
+            x_max = int((x_center + width / 2) * image_width)
+            y_max = int((y_center + height / 2) * image_height)
+
+            label = self.classes[class_index]
+
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+    def custom_to_coco(self, input_path, output_path):
+        coco_data = {
+            "info": {
+                "year": 2023,
+                "version": VERSION,
+                "description": "COCO Label Conversion",
+                "contributor": "CVHub",
+                "url": "https://github.com/CVHub520/X-AnyLabeling",
+                "date_created": str(date.today())
+            },
+            "licenses": [
+                {
+                    "id": 1,
+                    "url": "https://www.gnu.org/licenses/gpl-3.0.html",
+                    "name": "GNU GENERAL PUBLIC LICENSE Version 3"
+                }
+            ],
+            "categories": [],
+            "images": [],
+            "annotations": []
+        }
+
+        for i, class_name in enumerate(self.classes):
+            coco_data['categories'].append({
+                "id": i+1,
+                "name": class_name,
+                "supercategory": ""
+            })
+
+        image_id = 0
+        annotation_id = 0
+
+        file_list = os.listdir(input_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+
+            image_id += 1
+
+            input_file = os.path.join(input_path, file_name)
+            with open(input_file, 'r') as f:
+                data = json.load(f)
+
+            image_path = data['imagePath']
+            image_name = os.path.splitext(os.path.basename(image_path))[0]
+
+            coco_data['images'].append({
+                "id": image_id,
+                "file_name": image_name,
+                "width": data['imageWidth'],
+                "height": data['imageHeight'],
+                "license": 0,
+                "flickr_url": "",
+                "coco_url": "",
+                "date_captured": ""
+            })
+
+            for shape in data['shapes']:
+                annotation_id += 1
+                label = shape['label']
+                points = shape['points']
+                class_id = self.classes.index(label)
+                x_min = min(points[0][0], points[1][0])
+                y_min = min(points[0][1], points[1][1])
+                x_max = max(points[0][0], points[1][0])
+                y_max = max(points[0][1], points[1][1])
+                width = x_max - x_min
+                height = y_max - y_min
+
+                annotation = {
+                    "id": annotation_id,
+                    "image_id": image_id,
+                    "category_id": class_id+1,
+                    "bbox": [x_min, y_min, width, height],
+                    "area": width * height,
+                    "iscrowd": 0
+                }
+
+                coco_data['annotations'].append(annotation)
+
+        output_file = os.path.join(output_path, "x_anylabeling_coco.json")
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(coco_data, f, indent=4, ensure_ascii=False)
+
+    def coco_to_custom(self, input_file, output_path, image_path):
+
+        img_dic = {}
+        for file in os.listdir(image_path):
+            img_dic[file] = file
+
+        with open(input_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            
+        if not self.classes:
+            for cat in data["categories"]:
+                self.classes.append(cat["name"])
+
+        total_info, label_info = {}, {}
+
+        # map category_id to name
+        for dic_info in data["categories"]:
+            label_info[dic_info["id"]] = dic_info["name"]
+
+        # map image_id to info
+        for dic_info in data["images"]:
+            total_info[dic_info["id"]] = {
+                "imageWidth": dic_info["width"],
+                "imageHeight": dic_info["height"],
+                "imagePath": img_dic[dic_info["file_name"]],
+                "shapes": []
+            }
+        
+        for dic_info in data["annotations"]:
+
+            bbox = dic_info["bbox"]
+            x_min = bbox[0]
+            y_min = bbox[1]
+            width = bbox[2]
+            height = bbox[3]
+            x_max = x_min + width
+            y_max = y_min + height
+
+            shape_info = {
+                "label": self.classes[dic_info["category_id"]-1],
+                "text": None,
+                "points": [[x_min, y_min], [x_max, y_max]],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+
+            total_info[dic_info["image_id"]]["shapes"].append(shape_info)
+    
+        for dic_info in tqdm(total_info.values(), desc='Converting files', unit='file', colour='green'):
+            self.reset()
+            self.custom_data["shapes"] = dic_info["shapes"]
+            self.custom_data["imagePath"] = dic_info["imagePath"]
+            self.custom_data["imageHeight"] = dic_info["imageHeight"]
+            self.custom_data["imageWidth"] = dic_info["imageWidth"]
+
+            output_file = os.path.join(output_path, os.path.splitext(dic_info["imagePath"])[0]+".json")
+            with open(output_file, 'w', encoding='utf-8') as f:
+                json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+class PolyLabelConvert(BaseLabelConverter):
+
+    def custom_to_yolov5(self, input_file, output_file):
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+
+        image_width = data['imageWidth']
+        image_height = data['imageHeight']
+        image_size = np.array([[image_width, image_height]])
+
+        with open(output_file, 'w') as f:
+            for shape in data['shapes']:
+                label = shape['label']
+                points = np.array(shape['points'])
+                class_index = self.classes.index(label)
+                norm_points = points / image_size
+                f.write(f"{class_index} " + " ".join([" ".join([str(cell[0]), str(cell[1])]) for cell in norm_points.tolist()]) + "\n")
+
+    def yolov5_to_custom(self, input_file, output_file, image_file):
+        self.reset()
+
+        with open(input_file, 'r') as f:
+            lines = f.readlines()
+
+        image_width, image_height = self.get_image_size(image_file)
+        image_size = np.array([image_width, image_height], np.float64)
+
+        for line in lines:
+            line = line.strip().split(' ')
+            class_index = int(line[0])
+            label = self.classes[class_index]
+            masks = line[1:]
+            shape = {
+                "label": label,
+                "text": None,
+                "points": [],
+                "group_id": None,
+                "shape_type": "rectangle",
+                "flags": {}
+            }
+            for x, y in zip(masks[0::2], masks[1::2]):
+                point = [np.float64(x), np.float64(y)]
+                point = np.array(point, np.float64) * image_size
+                shape['points'].append(point.tolist())
+            self.custom_data['shapes'].append(shape)
+
+        self.custom_data['imagePath'] = os.path.basename(image_file)
+        self.custom_data['imageHeight'] = image_height
+        self.custom_data['imageWidth'] = image_width
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
+
+def main():
+    parser = argparse.ArgumentParser(description='Label Converter')
+    # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo
+    parser.add_argument('--task', default='rectangle', choices=['rectangle', 'polygon'], help='Choose the type of task to perform')
+    parser.add_argument('--src_path', default='custom', help='Path to input directory')
+    parser.add_argument('--dst_path', default='yolo', help='Path to output directory')
+    parser.add_argument('--img_path', help='Path to image directory')
+    parser.add_argument('--classes', default='classes.txt', help='Path to classes.txt file, where each line represent a specific class')
+    parser.add_argument('--mode', default='custom2yolo', help='Choose the conversion mode what you need',
+                        choices=['custom2voc', 'voc2custom', 'custom2yolo', 'yolo2custom', 'custom2coco', 'coco2custom'])
+    args = parser.parse_args()
+    print(f"Starting conversion to {args.mode} format of {args.task}...")
+    start_time = time.time()
+
+    if args.task == 'rectangle':
+        converter = RectLabelConverter(args.classes)
+    elif args.task == 'polygon':
+        converter = PolyLabelConvert(args.classes)
+        valid_modes = ['custom2yolo', 'yolo2custom']
+        assert args.mode in valid_modes, f"Polygon tasks are only supported in {valid_modes} now!"
+
+    if args.mode == "custom2voc":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0]+'.xml')
+            converter.custom_to_voc2017(src_file, dst_file)
+    elif args.mode == "voc2custom":
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0]+'.json')
+            converter.voc2017_to_custom(src_file, dst_file)
+    elif args.mode == "custom2yolo":
+        file_list = os.listdir(args.src_path)
+        os.makedirs(args.dst_path, exist_ok=True)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0]+'.txt')
+            converter.custom_to_yolov5(src_file, dst_file)
+    elif args.mode == "yolo2custom":
+        img_dic = {}
+        for file in os.listdir(args.img_path):
+            prefix = file.split('.')[0]
+            img_dic[prefix] = file
+        file_list = os.listdir(args.src_path)
+        for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
+            src_file = os.path.join(args.src_path, file_name)
+            dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0]+'.json')
+            img_file = os.path.join(args.img_path, img_dic[os.path.splitext(file_name)[0]])
+            converter.yolov5_to_custom(src_file, dst_file, img_file)
+    elif args.mode == "custom2coco":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.custom_to_coco(args.src_path, args.dst_path)
+    elif args.mode == "coco2custom":
+        os.makedirs(args.dst_path, exist_ok=True)
+        converter.coco_to_custom(args.src_path, args.dst_path, args.img_path)
+
+    end_time = time.time()
+    print(f"Conversion completed successfully: {args.dst_path}")
+    print(f"Conversion time: {end_time - start_time:.2f} seconds")
+
+if __name__ == '__main__':
+    main()

+ 46 - 0
yolov8_study/yolov8-seg.yaml

@@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+
+# Parameters
+nc: 2  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Segment(P3, P4, P5)

+ 46 - 0
yolov8_study/yolov8.yaml

@@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 2  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]  # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024]  # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768]   # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs 1.3G
+  l: [1.00, 1.00, 512]   # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512]   # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)