From 05debad2f028620cb4efad00910bf6fa78c2b9e6 Mon Sep 17 00:00:00 2001
From: SWHL <liekkaskono@163.com>
Date: Tue, 31 Oct 2023 22:27:26 +0800
Subject: [PATCH] Add labelme to coco

---
 README.md                                     |   1 +
 .../val_0001.jpg                              | Bin
 .../val_0001.json                             |   0
 .../val_0002.jpg                              | Bin
 .../val_0002.json                             |   0
 demo.py                                       |   9 -
 label_convert/labelme_to_coco.py              | 242 ++++++------------
 requirements.txt                              |   3 +-
 setup.py                                      |   7 +-
 9 files changed, 84 insertions(+), 178 deletions(-)
 rename dataset/{labelme_format => labelme_dataset}/val_0001.jpg (100%)
 rename dataset/{labelme_format => labelme_dataset}/val_0001.json (100%)
 rename dataset/{labelme_format => labelme_dataset}/val_0002.jpg (100%)
 rename dataset/{labelme_format => labelme_dataset}/val_0002.json (100%)
 delete mode 100644 demo.py

diff --git a/README.md b/README.md
index 2ccf09b..adef1ba 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ D(COCO) --> A
 B --> D
 E(YOLOv5 YAML) --> D
 F(darknet) --> D
+G(labelme) --> D
 ```
 
 ## Installation
diff --git a/dataset/labelme_format/val_0001.jpg b/dataset/labelme_dataset/val_0001.jpg
similarity index 100%
rename from dataset/labelme_format/val_0001.jpg
rename to dataset/labelme_dataset/val_0001.jpg
diff --git a/dataset/labelme_format/val_0001.json b/dataset/labelme_dataset/val_0001.json
similarity index 100%
rename from dataset/labelme_format/val_0001.json
rename to dataset/labelme_dataset/val_0001.json
diff --git a/dataset/labelme_format/val_0002.jpg b/dataset/labelme_dataset/val_0002.jpg
similarity index 100%
rename from dataset/labelme_format/val_0002.jpg
rename to dataset/labelme_dataset/val_0002.jpg
diff --git a/dataset/labelme_format/val_0002.json b/dataset/labelme_dataset/val_0002.json
similarity index 100%
rename from dataset/labelme_format/val_0002.json
rename to dataset/labelme_dataset/val_0002.json
diff --git a/demo.py b/demo.py
deleted file mode 100644
index 3d82f79..0000000
--- a/demo.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from label_convert.labelme_to_coco import LabelmeToCOCO
-
-data_dir = "dataset/labelme_format"
-out_dir = "output"
-convert = LabelmeToCOCO(
-    data_dir=data_dir, out_dir=out_dir, val_ratio=0.1, have_test=True, test_ratio=0.1
-)
-
-convert()
diff --git a/label_convert/labelme_to_coco.py b/label_convert/labelme_to_coco.py
index 0cbc4b0..18e118b 100644
--- a/label_convert/labelme_to_coco.py
+++ b/label_convert/labelme_to_coco.py
@@ -6,11 +6,10 @@
 import random
 import shutil
 import time
-import warnings
 from pathlib import Path
 from typing import List, Optional, Union
 
-import cv2
+import numpy as np
 from tqdm import tqdm
 
 
@@ -33,7 +32,8 @@ def __init__(
         if out_dir is None:
             save_dir_name = f"{Path(self.raw_data_dir).name}_COCO_format"
             self.output_dir = self.raw_data_dir.parent / save_dir_name
-        self.output_dir = Path(out_dir)
+        else:
+            self.output_dir = Path(out_dir)
         self.mkdir(self.output_dir)
 
         self.anno_dir = self.output_dir / "annotations"
@@ -52,7 +52,14 @@ def __init__(
 
         self.cur_year = time.strftime("%Y", time.localtime(time.time()))
 
-    def __call__(self, mode_list: List[str] = None):
+        self.cls_to_idx = {}
+        self.object_id = 1
+
+        self.categories = self._get_category()
+
+    def __call__(
+        self,
+    ):
         img_list = self.get_img_list()
         if not img_list:
             raise ValueError(f"{self.raw_data_dir} is empty!")
@@ -66,48 +73,15 @@ def __call__(self, mode_list: List[str] = None):
         )
         train_list, val_list, test_list = split_list
 
-        # 遍历所有的json，得到所有类别字段
-        # TODO
-
-        anno = self._init_json()
-        for i, img_path in enumerate(train_list):
-            img_id = i + 1
+        train_anno = self.generate_json(train_list, self.train_dir)
+        self.write_json(self.anno_dir / "instances_train2017.json", train_anno)
 
-            new_img_name = f"{img_id:012d}.jpg"
-            new_img_path = self.train_dir / new_img_name
+        val_anno = self.generate_json(val_list, self.val_dir)
+        self.write_json(self.anno_dir / "instances_val2017.json", val_anno)
 
-            # 将图像复制到指定目录下
-            self.cp_file(img_path, new_img_path)
-
-            raw_json_path = img_path.with_suffix(".json")
-            raw_json_data = self.read_json(raw_json_path)
-
-            # 写入到json中
-            img_info = {
-                "date_captured": str(self.cur_year),
-                "file_name": new_img_name,
-                "id": img_id,
-                "height": raw_json_data.get("imageHeight"),
-                "width": raw_json_data.get("imageWidth"),
-            }
-
-            # 记录类别
-
-        print("ok")
-
-        for mode in mode_list:
-            # Create the directory of saving the new image.
-            save_img_dir = self.output_dir / f"{mode}2017"
-            self.mkdir(save_img_dir)
-
-            # Generate json file.
-            anno_dir = self.output_dir / "annotations"
-            self.mkdir(anno_dir)
-
-            save_json_path = anno_dir / f"instances_{mode}2017.json"
-            json_data = self.convert(img_list, save_img_dir, mode)
-
-            self.write_json(save_json_path, json_data)
+        if test_list:
+            test_anno = self.generate_json(test_list, self.test_dir)
+            self.write_json(self.anno_dir / "instances_test2017.json", test_anno)
         print(f"Successfully convert, detail in {self.output_dir}")
 
     def get_img_list(self):
@@ -119,7 +93,7 @@ def gen_image_label_dir(self, img_list):
         new_image_list = []
         for img_path in tqdm(img_list):
             right_label_path = img_path.with_name(f"{img_path.stem}.json")
-            if right_label_path.exists() and self.read_txt(str(right_label_path)):
+            if right_label_path.exists() and self.read_json(str(right_label_path)):
                 new_image_list.append(img_path)
         return new_image_list
 
@@ -161,141 +135,81 @@ def _init_json(self):
                     "url": "https://github.com/RapidAI/LabelConvert/LICENSE",
                 }
             ],
-            "categories": [],
+            "categories": self.categories,
         }
         return annotation_info
 
     def _get_category(
         self,
     ):
-        # 这个放在扫描全部json的中获取
-        class_list = self.read_txt(classes_path)
-        categories = []
-        for i, category in enumerate(class_list, 1):
-            categories.append(
+        json_list = Path(self.raw_data_dir).glob("*.json")
+        all_categories = []
+        for json_path in json_list:
+            json_info = self.read_json(json_path)
+            shapes = json_info.get("shapes", [])
+            all_categories.extend([v["label"] for v in shapes])
+
+        categories = list(set(all_categories))
+        categories.sort(key=all_categories.index)
+
+        coco_categories = []
+        for i, cls_name in enumerate(categories):
+            coco_categories.append(
                 {
-                    "supercategory": category,
-                    "id": i,
-                    "name": category,
+                    "supercategory": cls_name,
+                    "id": i + 1,
+                    "name": cls_name,
                 }
             )
-        return categories
-
-    def convert(self, img_list, save_img_dir, mode):
-        images, annotations = [], []
-        for img_id, img_path in enumerate(tqdm(img_list, desc=mode), 1):
-            image_dict = self.get_image_info(img_path, img_id, save_img_dir)
-            images.append(image_dict)
+        self.cls_to_idx = {v: i + 1 for i, v in enumerate(categories)}
+        return coco_categories
 
-            label_path = self.raw_data_dir / "labels" / f"{Path(img_path).stem}.txt"
-            annotation = self.get_annotation(
-                label_path, img_id, image_dict["height"], image_dict["width"]
-            )
-            annotations.extend(annotation)
-
-        json_data = {
-            "info": self.info,
-            "images": images,
-            "licenses": self.licenses,
-            "type": self.type,
-            "annotations": annotations,
-            "categories": self.categories,
-        }
-        return json_data
-
-    def get_image_info(self, img_path, img_id, save_img_dir):
-        img_path = Path(img_path)
-        if self.raw_data_dir.as_posix() not in img_path.as_posix():
-            # relative path (relative to the raw_data_dir)
-            # e.g. images/images(3).jpg
-            img_path = self.raw_data_dir / img_path
-
-        self.verify_exists(img_path)
-
-        new_img_name = f"{img_id:012d}.jpg"
-        save_img_path = save_img_dir / new_img_name
-        img_src = cv2.imread(str(img_path))
-        if img_path.suffix.lower() == ".jpg":
-            shutil.copyfile(img_path, save_img_path)
-        else:
-            cv2.imwrite(str(save_img_path), img_src)
-
-        height, width = img_src.shape[:2]
-        image_info = {
-            "date_captured": self.cur_year,
-            "file_name": new_img_name,
-            "id": img_id,
-            "height": height,
-            "width": width,
-        }
-        return image_info
-
-    def get_annotation(self, label_path: Path, img_id, height, width):
-        def get_box_info(vertex_info, height, width):
-            cx, cy, w, h = [float(i) for i in vertex_info]
-
-            cx = cx * width
-            cy = cy * height
-            box_w = w * width
-            box_h = h * height
-
-            # left top
-            x0 = max(cx - box_w / 2, 0)
-            y0 = max(cy - box_h / 2, 0)
+    def generate_json(self, img_list, save_dir):
+        anno = self._init_json()
+        for i, img_path in enumerate(img_list):
+            img_id = i + 1
 
-            # right bottom
-            x1 = min(x0 + box_w, width)
-            y1 = min(y0 + box_h, height)
+            new_img_name = f"{img_id:012d}{Path(img_path).suffix}"
+            new_img_path = save_dir / new_img_name
+            self.cp_file(img_path, new_img_path)
 
-            segmentation = [[x0, y0, x1, y0, x1, y1, x0, y1]]
-            bbox = [x0, y0, box_w, box_h]
-            area = box_w * box_h
-            return segmentation, bbox, area
+            raw_json_path = img_path.with_suffix(".json")
+            raw_json_data = self.read_json(raw_json_path)
 
-        if not label_path.exists():
-            annotation = [
-                {
-                    "segmentation": [],
-                    "area": 0,
-                    "iscrowd": 0,
-                    "image_id": img_id,
-                    "bbox": [],
-                    "category_id": -1,
-                    "id": self.annotation_id,
-                }
-            ]
-            self.annotation_id += 1
-            return annotation
-
-        annotation = []
-        label_list = self.read_txt(str(label_path))
-        for i, one_line in enumerate(label_list):
-            label_info = one_line.split(" ")
-            if len(label_info) < 5:
-                warnings.warn(f"The {i+1} line of the {label_path} has been corrupted.")
-                continue
-
-            category_id, vertex_info = label_info[0], label_info[1:]
-            segmentation, bbox, area = get_box_info(vertex_info, height, width)
-            annotation.append(
-                {
-                    "segmentation": segmentation,
+            img_info = {
+                "date_captured": str(self.cur_year),
+                "file_name": new_img_name,
+                "id": img_id,
+                "height": raw_json_data.get("imageHeight"),
+                "width": raw_json_data.get("imageWidth"),
+            }
+            anno["images"].append(img_info)
+
+            shapes = raw_json_data.get("shapes", [])
+            anno_list = []
+            for shape in shapes:
+                label_name = shape.get("label")
+                label_id = self.cls_to_idx[label_name]
+
+                points = np.array(shape.get("points"))
+                x0, y0 = np.min(points, axis=0)
+                x1, y1 = np.max(points, axis=0)
+                area = (x1 - x0) * (y1 - y0)
+
+                seg_points = [np.ravel(points, order="C").tolist()]
+                one_anno_dict = {
+                    "segmentation": seg_points,
                     "area": area,
                     "iscrowd": 0,
                     "image_id": img_id,
-                    "bbox": bbox,
-                    "category_id": int(category_id) + 1,
-                    "id": self.annotation_id,
+                    "bbox": [x0, y0, x1, y1],
+                    "category_id": label_id,
+                    "id": self.object_id,
                 }
-            )
-            self.annotation_id += 1
-        return annotation
-
-    @staticmethod
-    def read_txt(txt_path):
-        with open(str(txt_path), "r", encoding="utf-8") as f:
-            data = list(map(lambda x: x.rstrip("\n"), f))
-        return data
+                anno_list.append(one_anno_dict)
+                self.object_id += 1
+            anno["annotations"].extend(anno_list)
+        return anno
 
     @staticmethod
     def read_json(json_path: Union[str, Path]):
@@ -313,7 +227,7 @@ def verify_exists(file_path: Union[Path, str]):
             raise FileNotFoundError(f"The {file_path} is not exists!!!")
 
     @staticmethod
-    def write_json(json_path, content: dict):
+    def write_json(json_path: Union[str, Path], content: dict):
         with open(json_path, "w", encoding="utf-8") as f:
             json.dump(content, f, ensure_ascii=False)
 
diff --git a/requirements.txt b/requirements.txt
index 8c9febe..619f011 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 opencv_python
-tqdm
\ No newline at end of file
+tqdm
+numpy
\ No newline at end of file
diff --git a/setup.py b/setup.py
index a291f9f..3025aed 100644
--- a/setup.py
+++ b/setup.py
@@ -44,9 +44,7 @@ def get_readme() -> str:
         VERSION_NUM = matched_versions
 sys.argv = sys.argv[:2]
 
-project_urls = {
-    'Documentation': 'https://rapidai.github.io/LabelConvert/docs'
-}
+project_urls = {"Documentation": "https://rapidai.github.io/LabelConvert/docs"}
 
 setuptools.setup(
     name=MODULE_NAME,
@@ -82,7 +80,8 @@ def get_readme() -> str:
             f"labelImg_to_yolov5={MODULE_NAME}.labelImg_to_yolov5:main",
             f"yolov5_to_coco={MODULE_NAME}.yolov5_to_coco:main",
             f"yolov5_yaml_to_coco={MODULE_NAME}.yolov5_yaml_to_coco:main",
-            f'labelImg_to_publaynet={MODULE_NAME}.labelImg_to_publaynet:main',
+            f"labelImg_to_publaynet={MODULE_NAME}.labelImg_to_publaynet:main",
+            f"labelme_to_coco={MODULE_NAME}.labelme_to_coco:main",
         ],
     },
 )