label_converter.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. import argparse
  2. import json
  3. import os
  4. import time
  5. from PIL import Image
  6. from tqdm import tqdm
  7. from datetime import date
  8. import numpy as np
  9. import xml.dom.minidom as minidom
  10. import xml.etree.ElementTree as ET
  11. import sys
  12. sys.path.append('.')
  13. from anylabeling.app_info import __version__
  14. # ======================================================================= Usage ========================================================================#
  15. # #
  16. # -------------------------------------------------------------------- custom2voc ---------------------------------------------------------------------#
  17. # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2voc #
  18. # #
  19. # -------------------------------------------------------------------- voc2custom ---------------------------------------------------------------------#
  20. # python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode voc2custom #
  21. # #
  22. # -------------------------------------------------------------------- custom2yolo --------------------------------------------------------------------#
  23. # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo #
  24. # #
  25. # -------------------------------------------------------------------- yolo2custom --------------------------------------------------------------------#
  26. # python tools/label_converter.py --src_path xxx_folder --img_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode yolo2custom #
  27. # #
  28. # -------------------------------------------------------------------- custom2coco --------------------------------------------------------------------#
  29. # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2coco #
  30. # #
  31. # -------------------------------------------------------------------- coco2custom --------------------------------------------------------------------#
  32. # python tools/label_converter.py --src_path xxx.json --dst_path xxx_folder --img_path xxx_folder --classes xxx.txt --mode coco2custom #
  33. # #
  34. # ======================================================================= Usage ========================================================================#
  35. VERSION = __version__
  36. class BaseLabelConverter:
  37. def __init__(self, classes_file):
  38. if classes_file:
  39. with open(classes_file, 'r') as f:
  40. self.classes = f.read().splitlines()
  41. else:
  42. self.classes = []
  43. def reset(self):
  44. self.custom_data = dict(
  45. version=VERSION,
  46. flags={},
  47. shapes=[],
  48. imagePath="",
  49. imageData=None,
  50. imageHeight=-1,
  51. imageWidth=-1
  52. )
  53. def get_image_size(self, image_file):
  54. with Image.open(image_file) as img:
  55. width, height = img.size
  56. return width, height
  57. class RectLabelConverter(BaseLabelConverter):
  58. def custom_to_voc2017(self, input_file, output_dir):
  59. with open(input_file, 'r') as f:
  60. data = json.load(f)
  61. image_path = data['imagePath']
  62. image_width = data['imageWidth']
  63. image_height = data['imageHeight']
  64. root = ET.Element('annotation')
  65. ET.SubElement(root, 'folder').text = os.path.dirname(output_dir)
  66. ET.SubElement(root, 'filename').text = os.path.basename(image_path)
  67. size = ET.SubElement(root, 'size')
  68. ET.SubElement(size, 'width').text = str(image_width)
  69. ET.SubElement(size, 'height').text = str(image_height)
  70. ET.SubElement(size, 'depth').text = '3'
  71. for shape in data['shapes']:
  72. label = shape['label']
  73. points = shape['points']
  74. xmin = str(points[0][0])
  75. ymin = str(points[0][1])
  76. xmax = str(points[1][0])
  77. ymax = str(points[1][1])
  78. object_elem = ET.SubElement(root, 'object')
  79. ET.SubElement(object_elem, 'name').text = label
  80. ET.SubElement(object_elem, 'pose').text = 'Unspecified'
  81. ET.SubElement(object_elem, 'truncated').text = '0'
  82. ET.SubElement(object_elem, 'difficult').text = '0'
  83. bndbox = ET.SubElement(object_elem, 'bndbox')
  84. ET.SubElement(bndbox, 'xmin').text = xmin
  85. ET.SubElement(bndbox, 'ymin').text = ymin
  86. ET.SubElement(bndbox, 'xmax').text = xmax
  87. ET.SubElement(bndbox, 'ymax').text = ymax
  88. xml_string = ET.tostring(root, encoding='utf-8')
  89. dom = minidom.parseString(xml_string)
  90. formatted_xml = dom.toprettyxml(indent=' ')
  91. with open(output_dir, 'w') as f:
  92. f.write(formatted_xml)
  93. def voc2017_to_custom(self, input_file, output_file):
  94. self.reset()
  95. tree = ET.parse(input_file)
  96. root = tree.getroot()
  97. image_path = root.find('filename').text
  98. image_width = int(root.find('size/width').text)
  99. image_height = int(root.find('size/height').text)
  100. self.custom_data['imagePath'] = image_path
  101. self.custom_data['imageHeight'] = image_height
  102. self.custom_data['imageWidth'] = image_width
  103. for obj in root.findall('object'):
  104. label = obj.find('name').text
  105. xmin = float(obj.find('bndbox/xmin').text)
  106. ymin = float(obj.find('bndbox/ymin').text)
  107. xmax = float(obj.find('bndbox/xmax').text)
  108. ymax = float(obj.find('bndbox/ymax').text)
  109. shape = {
  110. "label": label,
  111. "text": "",
  112. "points": [[xmin, ymin], [xmax, ymax]],
  113. "group_id": None,
  114. "shape_type": "rectangle",
  115. "flags": {}
  116. }
  117. self.custom_data['shapes'].append(shape)
  118. with open(output_file, 'w', encoding='utf-8') as f:
  119. json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
  120. def custom_to_yolov5(self, input_file, output_file):
  121. with open(input_file, 'r') as f:
  122. data = json.load(f)
  123. image_width = data['imageWidth']
  124. image_height = data['imageHeight']
  125. with open(output_file, 'w') as f:
  126. for shape in data['shapes']:
  127. label = shape['label']
  128. points = shape['points']
  129. class_index = self.classes.index(label)
  130. x_center = (points[0][0] + points[1][0]) / (2 * image_width)
  131. y_center = (points[0][1] + points[1][1]) / (2 * image_height)
  132. width = abs(points[1][0] - points[0][0]) / image_width
  133. height = abs(points[1][1] - points[0][1]) / image_height
  134. f.write(f"{class_index} {x_center} {y_center} {width} {height}\n")
  135. def yolov5_to_custom(self, input_file, output_file, image_file):
  136. self.reset()
  137. with open(input_file, 'r') as f:
  138. lines = f.readlines()
  139. image_width, image_height = self.get_image_size(image_file)
  140. for line in lines:
  141. line = line.strip().split(' ')
  142. class_index = int(line[0])
  143. x_center = float(line[1])
  144. y_center = float(line[2])
  145. width = float(line[3])
  146. height = float(line[4])
  147. x_min = int((x_center - width / 2) * image_width)
  148. y_min = int((y_center - height / 2) * image_height)
  149. x_max = int((x_center + width / 2) * image_width)
  150. y_max = int((y_center + height / 2) * image_height)
  151. label = self.classes[class_index]
  152. shape = {
  153. "label": label,
  154. "text": None,
  155. "points": [[x_min, y_min], [x_max, y_max]],
  156. "group_id": None,
  157. "shape_type": "rectangle",
  158. "flags": {}
  159. }
  160. self.custom_data['shapes'].append(shape)
  161. self.custom_data['imagePath'] = os.path.basename(image_file)
  162. self.custom_data['imageHeight'] = image_height
  163. self.custom_data['imageWidth'] = image_width
  164. with open(output_file, 'w', encoding='utf-8') as f:
  165. json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
  166. def custom_to_coco(self, input_path, output_path):
  167. coco_data = {
  168. "info": {
  169. "year": 2023,
  170. "version": VERSION,
  171. "description": "COCO Label Conversion",
  172. "contributor": "CVHub",
  173. "url": "https://github.com/CVHub520/X-AnyLabeling",
  174. "date_created": str(date.today())
  175. },
  176. "licenses": [
  177. {
  178. "id": 1,
  179. "url": "https://www.gnu.org/licenses/gpl-3.0.html",
  180. "name": "GNU GENERAL PUBLIC LICENSE Version 3"
  181. }
  182. ],
  183. "categories": [],
  184. "images": [],
  185. "annotations": []
  186. }
  187. for i, class_name in enumerate(self.classes):
  188. coco_data['categories'].append({
  189. "id": i + 1,
  190. "name": class_name,
  191. "supercategory": ""
  192. })
  193. image_id = 0
  194. annotation_id = 0
  195. file_list = os.listdir(input_path)
  196. for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
  197. image_id += 1
  198. input_file = os.path.join(input_path, file_name)
  199. with open(input_file, 'r') as f:
  200. data = json.load(f)
  201. image_path = data['imagePath']
  202. image_name = os.path.splitext(os.path.basename(image_path))[0]
  203. coco_data['images'].append({
  204. "id": image_id,
  205. "file_name": image_name,
  206. "width": data['imageWidth'],
  207. "height": data['imageHeight'],
  208. "license": 0,
  209. "flickr_url": "",
  210. "coco_url": "",
  211. "date_captured": ""
  212. })
  213. for shape in data['shapes']:
  214. annotation_id += 1
  215. label = shape['label']
  216. points = shape['points']
  217. class_id = self.classes.index(label)
  218. x_min = min(points[0][0], points[1][0])
  219. y_min = min(points[0][1], points[1][1])
  220. x_max = max(points[0][0], points[1][0])
  221. y_max = max(points[0][1], points[1][1])
  222. width = x_max - x_min
  223. height = y_max - y_min
  224. annotation = {
  225. "id": annotation_id,
  226. "image_id": image_id,
  227. "category_id": class_id + 1,
  228. "bbox": [x_min, y_min, width, height],
  229. "area": width * height,
  230. "iscrowd": 0
  231. }
  232. coco_data['annotations'].append(annotation)
  233. output_file = os.path.join(output_path, "x_anylabeling_coco.json")
  234. with open(output_file, 'w', encoding='utf-8') as f:
  235. json.dump(coco_data, f, indent=4, ensure_ascii=False)
  236. def coco_to_custom(self, input_file, output_path, image_path):
  237. img_dic = {}
  238. for file in os.listdir(image_path):
  239. img_dic[file] = file
  240. with open(input_file, 'r', encoding='utf-8') as f:
  241. data = json.load(f)
  242. if not self.classes:
  243. for cat in data["categories"]:
  244. self.classes.append(cat["name"])
  245. total_info, label_info = {}, {}
  246. # map category_id to name
  247. for dic_info in data["categories"]:
  248. label_info[dic_info["id"]] = dic_info["name"]
  249. # map image_id to info
  250. for dic_info in data["images"]:
  251. total_info[dic_info["id"]] = {
  252. "imageWidth": dic_info["width"],
  253. "imageHeight": dic_info["height"],
  254. "imagePath": img_dic[dic_info["file_name"]],
  255. "shapes": []
  256. }
  257. for dic_info in data["annotations"]:
  258. bbox = dic_info["bbox"]
  259. x_min = bbox[0]
  260. y_min = bbox[1]
  261. width = bbox[2]
  262. height = bbox[3]
  263. x_max = x_min + width
  264. y_max = y_min + height
  265. shape_info = {
  266. "label": self.classes[dic_info["category_id"] - 1],
  267. "text": None,
  268. "points": [[x_min, y_min], [x_max, y_max]],
  269. "group_id": None,
  270. "shape_type": "rectangle",
  271. "flags": {}
  272. }
  273. total_info[dic_info["image_id"]]["shapes"].append(shape_info)
  274. for dic_info in tqdm(total_info.values(), desc='Converting files', unit='file', colour='green'):
  275. self.reset()
  276. self.custom_data["shapes"] = dic_info["shapes"]
  277. self.custom_data["imagePath"] = dic_info["imagePath"]
  278. self.custom_data["imageHeight"] = dic_info["imageHeight"]
  279. self.custom_data["imageWidth"] = dic_info["imageWidth"]
  280. output_file = os.path.join(output_path, os.path.splitext(dic_info["imagePath"])[0] + ".json")
  281. with open(output_file, 'w', encoding='utf-8') as f:
  282. json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
  283. class PolyLabelConvert(BaseLabelConverter):
  284. def custom_to_yolov5(self, input_file, output_file):
  285. with open(input_file, 'r') as f:
  286. data = json.load(f)
  287. image_width = data['imageWidth']
  288. image_height = data['imageHeight']
  289. image_size = np.array([[image_width, image_height]])
  290. with open(output_file, 'w') as f:
  291. for shape in data['shapes']:
  292. label = shape['label']
  293. points = np.array(shape['points'])
  294. class_index = self.classes.index(label)
  295. norm_points = points / image_size
  296. f.write(f"{class_index} " + " ".join(
  297. [" ".join([str(cell[0]), str(cell[1])]) for cell in norm_points.tolist()]) + "\n")
  298. def yolov5_to_custom(self, input_file, output_file, image_file):
  299. self.reset()
  300. with open(input_file, 'r') as f:
  301. lines = f.readlines()
  302. image_width, image_height = self.get_image_size(image_file)
  303. image_size = np.array([image_width, image_height], np.float64)
  304. for line in lines:
  305. line = line.strip().split(' ')
  306. class_index = int(line[0])
  307. label = self.classes[class_index]
  308. masks = line[1:]
  309. shape = {
  310. "label": label,
  311. "text": None,
  312. "points": [],
  313. "group_id": None,
  314. "shape_type": "rectangle",
  315. "flags": {}
  316. }
  317. for x, y in zip(masks[0::2], masks[1::2]):
  318. point = [np.float64(x), np.float64(y)]
  319. point = np.array(point, np.float64) * image_size
  320. shape['points'].append(point.tolist())
  321. self.custom_data['shapes'].append(shape)
  322. self.custom_data['imagePath'] = os.path.basename(image_file)
  323. self.custom_data['imageHeight'] = image_height
  324. self.custom_data['imageWidth'] = image_width
  325. with open(output_file, 'w', encoding='utf-8') as f:
  326. json.dump(self.custom_data, f, indent=2, ensure_ascii=False)
  327. def main():
  328. parser = argparse.ArgumentParser(description='Label Converter')
  329. # python tools/label_converter.py --src_path xxx_folder --dst_path xxx_folder --classes xxx.txt --mode custom2yolo
  330. parser.add_argument('--task', default='polygon', choices=['rectangle', 'polygon'],
  331. help='Choose the type of task to perform')
  332. parser.add_argument('--src_path', default='custom', help='Path to input directory')
  333. parser.add_argument('--dst_path', default='yolo', help='Path to output directory')
  334. parser.add_argument('--img_path', help='Path to image directory')
  335. parser.add_argument('--classes', default='classes.txt',
  336. help='Path to classes.txt file, where each line represent a specific class')
  337. parser.add_argument('--mode', default='custom2yolo', help='Choose the conversion mode what you need',
  338. choices=['custom2voc', 'voc2custom', 'custom2yolo', 'yolo2custom', 'custom2coco',
  339. 'coco2custom'])
  340. args = parser.parse_args()
  341. print(f"Starting conversion to {args.mode} format of {args.task}...")
  342. start_time = time.time()
  343. if args.task == 'rectangle':
  344. converter = RectLabelConverter(args.classes)
  345. elif args.task == 'polygon':
  346. converter = PolyLabelConvert(args.classes)
  347. valid_modes = ['custom2yolo', 'yolo2custom']
  348. assert args.mode in valid_modes, f"Polygon tasks are only supported in {valid_modes} now!"
  349. if args.mode == "custom2voc":
  350. file_list = os.listdir(args.src_path)
  351. os.makedirs(args.dst_path, exist_ok=True)
  352. for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
  353. src_file = os.path.join(args.src_path, file_name)
  354. dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0] + '.xml')
  355. converter.custom_to_voc2017(src_file, dst_file)
  356. elif args.mode == "voc2custom":
  357. file_list = os.listdir(args.src_path)
  358. for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
  359. src_file = os.path.join(args.src_path, file_name)
  360. dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0] + '.json')
  361. converter.voc2017_to_custom(src_file, dst_file)
  362. elif args.mode == "custom2yolo":
  363. file_list = os.listdir(args.src_path)
  364. os.makedirs(args.dst_path, exist_ok=True)
  365. for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
  366. src_file = os.path.join(args.src_path, file_name)
  367. dst_file = os.path.join(args.dst_path, os.path.splitext(file_name)[0] + '.txt')
  368. converter.custom_to_yolov5(src_file, dst_file)
  369. elif args.mode == "yolo2custom":
  370. img_dic = {}
  371. for file in os.listdir(args.img_path):
  372. prefix = file.split('.')[0]
  373. img_dic[prefix] = file
  374. file_list = os.listdir(args.src_path)
  375. for file_name in tqdm(file_list, desc='Converting files', unit='file', colour='green'):
  376. src_file = os.path.join(args.src_path, file_name)
  377. dst_file = os.path.join(args.img_path, os.path.splitext(file_name)[0] + '.json')
  378. img_file = os.path.join(args.img_path, img_dic[os.path.splitext(file_name)[0]])
  379. converter.yolov5_to_custom(src_file, dst_file, img_file)
  380. elif args.mode == "custom2coco":
  381. os.makedirs(args.dst_path, exist_ok=True)
  382. converter.custom_to_coco(args.src_path, args.dst_path)
  383. elif args.mode == "coco2custom":
  384. os.makedirs(args.dst_path, exist_ok=True)
  385. converter.coco_to_custom(args.src_path, args.dst_path, args.img_path)
  386. end_time = time.time()
  387. print(f"Conversion completed successfully: {args.dst_path}")
  388. print(f"Conversion time: {end_time - start_time:.2f} seconds")
  389. if __name__ == '__main__':
  390. main()