Bump version to v2.3.1

CVHub520 · Jan 31, 2024 · d2974c8 · d2974c8
1 parent cb0c8c7
commit d2974c8
Show file tree

Hide file tree

Showing 25 changed files with 14,648 additions and 14,399 deletions.
diff --git a/README.md b/README.md
@@ -71,10 +71,12 @@
 
 ## 🥳 What's New [⏏️](#📄-table-of-contents)
 
+- Feb. 2024:
+  - 🤗 Release the latest version [2.3.1](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.1) 🤗
 - Jan. 2024:
   - 👏👏👏 Combining CLIP and SAM models for enhanced semantic and spatial understanding. An example can be found [here](./anylabeling/configs/auto_labeling/edge_sam_with_chinese_clip.yaml).
   - 🔥🔥🔥 Adding support for the [Depth Anything](https://github.com/LiheYoung/Depth-Anything.git) model in the depth estimation task.
-  - 🤗 Release the latest version [2.3.0](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.0) 🤗
+  - Release version [2.3.0](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.0).
   - Support [YOLOv8-OBB](https://github.com/ultralytics/ultralytics) model.
   - Support [RTMDet](https://github.com/open-mmlab/mmyolo/tree/main/configs/rtmdet) and [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmpose) model.
   - Release a [chinese license plate](https://github.com/we0091234/Chinese_license_plate_detection_recognition) detection and recognition model based on YOLOv5.

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -69,11 +69,13 @@
 
 ## 🥳 新功能 [⏏️](#📄-目录)
 
+- 2024年2月：
+  - - 🤗 发布[2.3.1](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.1)最新版本 🤗
 - 2024年1月：
   - 支持一键截取子图功能。
   - 👏👏👏 结合CLIP和SAM模型，实现更强大的语义和空间理解。具体可参考此[示例](./anylabeling/configs/auto_labeling/edge_sam_with_chinese_clip.yaml)。
   - 🔥🔥🔥 在深度估计任务中增加对[Depth Anything](https://github.com/LiheYoung/Depth-Anything.git)模型的支持。
-  - 🤗 发布[2.3.0](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.0)最新版本 🤗
+  - 发布[2.3.0](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.0)版本
   - 支持 [YOLOv8-OBB](https://github.com/ultralytics/ultralytics) 模型。
   - 支持 [RTMDet](https://github.com/open-mmlab/mmyolo/tree/main/configs/rtmdet) 和 [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmpose) 模型。
   - 支持基于YOLOv5的[中文车牌](https://github.com/we0091234/Chinese_license_plate_detection_recognition)检测和识别模型。

diff --git a/anylabeling/app_info.py b/anylabeling/app_info.py
@@ -1,4 +1,4 @@
 __appname__ = "X-AnyLabeling"
 __appdescription__ = "Advanced Auto Labeling Solution with Added Features"
-__version__ = "2.3.0"
+__version__ = "2.3.1"
 __preferred_device__ = "CPU"  # GPU or CPU
diff --git a/anylabeling/resources/resources.py b/anylabeling/resources/resources.py
diff --git a/anylabeling/resources/translations/en_US.ts b/anylabeling/resources/translations/en_US.ts
diff --git a/anylabeling/resources/translations/zh_CN.qm b/anylabeling/resources/translations/zh_CN.qm
diff --git a/anylabeling/resources/translations/zh_CN.ts b/anylabeling/resources/translations/zh_CN.ts
diff --git a/anylabeling/services/auto_labeling/__base__/clip.py b/anylabeling/services/auto_labeling/__base__/clip.py
@@ -13,43 +13,40 @@
 _MODEL_INFO = {
     "ViT-B-16": {
         "struct": "ViT-B-16@RoBERTa-wwm-ext-base-chinese",
-        "input_resolution": 224
+        "input_resolution": 224,
     },
     "ViT-L-14": {
         "struct": "ViT-L-14@RoBERTa-wwm-ext-base-chinese",
-        "input_resolution": 224
+        "input_resolution": 224,
     },
     "ViT-L-14-336": {
         "struct": "ViT-L-14-336@RoBERTa-wwm-ext-base-chinese",
-        "input_resolution": 336
+        "input_resolution": 336,
     },
     "ViT-H-14": {
         "struct": "ViT-H-14@RoBERTa-wwm-ext-large-chinese",
-        "input_resolution": 224
-    },
-    "RN50": {
-        "struct": "RN50@RBT3-chinese",
-        "input_resolution": 224
+        "input_resolution": 224,
     },
+    "RN50": {"struct": "RN50@RBT3-chinese", "input_resolution": 224},
 }
 
 
 class ChineseClipONNX:
     """Ref: https://github.com/OFA-Sys/Chinese-CLIP"""
+
     def __init__(
         self,
         txt_model_path: str,
         img_model_path: str,
         model_arch: str,
         device: str = "cpu",
-        context_length: int = 52
+        context_length: int = 52,
     ) -> None:
-
         # Load models
         self.txt_net = OnnxBaseModel(txt_model_path, device_type=device)
         self.img_net = OnnxBaseModel(img_model_path, device_type=device)
         # Image settings
-        self.image_size = _MODEL_INFO[model_arch]['input_resolution']
+        self.image_size = _MODEL_INFO[model_arch]["input_resolution"]
         # Text settings
         self._tokenizer = FullTokenizer()
         self.context_length = context_length
@@ -94,12 +91,19 @@ def normalize(data, mean, std):
         return arrays
 
     def image_preprocess(
-            self, image, image_size=224, bgr2rgb=False,
-            mean_value=[0.48145466, 0.4578275, 0.40821073],
-            std_value=[0.26862954, 0.26130258, 0.27577711],
-        ):
+        self,
+        image,
+        image_size=224,
+        bgr2rgb=False,
+        mean_value=[0.48145466, 0.4578275, 0.40821073],
+        std_value=[0.26862954, 0.26130258, 0.27577711],
+    ):
         # Resize using OpenCV
-        image_size = (image_size, image_size) if isinstance(image_size, int) else image_size
+        image_size = (
+            (image_size, image_size)
+            if isinstance(image_size, int)
+            else image_size
+        )
         image = cv2.resize(image, image_size)
         # Convert to RGB if needed
         if bgr2rgb:
@@ -131,16 +135,20 @@ def tokenize(self, texts, context_length=52):
 
         all_tokens = []
         for text in texts:
-            tokenized_text = [self._tokenizer.vocab['[CLS]']] \
-                + self._tokenizer.convert_tokens_to_ids(self._tokenizer.tokenize(text))[:context_length - 2] \
-                + [self._tokenizer.vocab['[SEP]']]
+            tokenized_text = (
+                [self._tokenizer.vocab["[CLS]"]]
+                + self._tokenizer.convert_tokens_to_ids(
+                    self._tokenizer.tokenize(text)
+                )[: context_length - 2]
+                + [self._tokenizer.vocab["[SEP]"]]
+            )
             all_tokens.append(tokenized_text)
 
         result = np.zeros((len(all_tokens), context_length), dtype=np.int64)
 
         for i, tokens in enumerate(all_tokens):
             assert len(tokens) <= context_length
-            result[i, :len(tokens)] = np.array(tokens)
+            result[i, : len(tokens)] = np.array(tokens)
 
         return result
 
@@ -150,6 +158,7 @@ def default_vocab():
     current_dir = os.path.dirname(__file__)
     return os.path.join(current_dir, "..", "configs", "clip_vocab.txt")
 
+
 def convert_to_unicode(text):
     """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
     if six.PY3:
@@ -162,6 +171,7 @@ def convert_to_unicode(text):
     else:
         raise ValueError("Not running on Python2 or Python 3?")
 
+
 def load_vocab(vocab_file):
     """Loads a vocabulary file into a dictionary."""
     vocab = collections.OrderedDict()
@@ -176,6 +186,7 @@ def load_vocab(vocab_file):
             index += 1
     return vocab
 
+
 def whitespace_tokenize(text):
     """Runs basic whitespace cleaning and splitting on a piece of text."""
     text = text.strip()
@@ -184,21 +195,27 @@ def whitespace_tokenize(text):
     tokens = text.split()
     return tokens
 
+
 def _is_punctuation(char):
     """Checks whether `chars` is a punctuation character."""
     cp = ord(char)
     # We treat all non-letter/number ASCII as punctuation.
     # Characters such as "^", "$", and "`" are not in the Unicode
     # Punctuation class but we treat them as punctuation anyways, for
     # consistency.
-    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
-            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+    if (
+        (cp >= 33 and cp <= 47)
+        or (cp >= 58 and cp <= 64)
+        or (cp >= 91 and cp <= 96)
+        or (cp >= 123 and cp <= 126)
+    ):
         return True
     cat = unicodedata.category(char)
     if cat.startswith("P"):
         return True
     return False
 
+
 def _is_control(char):
     """Checks whether `chars` is a control character."""
     # These are technically control characters but we count them as whitespace
@@ -210,6 +227,7 @@ def _is_control(char):
         return True
     return False
 
+
 def _is_whitespace(char):
     """Checks whether `chars` is a whitespace character."""
     # \t, \n, and \r are technically contorl characters but we treat them
@@ -221,16 +239,19 @@ def _is_whitespace(char):
         return True
     return False
 
+
 def convert_by_vocab(vocab, items):
     """Converts a sequence of [tokens|ids] using the vocab."""
     output = []
     for item in items:
         output.append(vocab[item])
     return output
 
+
 def convert_tokens_to_ids(vocab, tokens):
     return convert_by_vocab(vocab, tokens)
 
+
 def convert_ids_to_tokens(inv_vocab, ids):
     return convert_by_vocab(inv_vocab, ids)
 
@@ -324,14 +345,16 @@ def _is_chinese_char(self, cp):
         # as is Japanese Hiragana and Katakana. Those alphabets are used to write
         # space-separated words, so they are not treated specially and handled
         # like the all of the other languages.
-        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
-            (cp >= 0x3400 and cp <= 0x4DBF) or  #
-            (cp >= 0x20000 and cp <= 0x2A6DF) or  #
-            (cp >= 0x2A700 and cp <= 0x2B73F) or  #
-            (cp >= 0x2B740 and cp <= 0x2B81F) or  #
-            (cp >= 0x2B820 and cp <= 0x2CEAF) or
-            (cp >= 0xF900 and cp <= 0xFAFF) or  #
-                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+        if (
+            (cp >= 0x4E00 and cp <= 0x9FFF)
+            or (cp >= 0x3400 and cp <= 0x4DBF)  #
+            or (cp >= 0x20000 and cp <= 0x2A6DF)  #
+            or (cp >= 0x2A700 and cp <= 0x2B73F)  #
+            or (cp >= 0x2B740 and cp <= 0x2B81F)  #
+            or (cp >= 0x2B820 and cp <= 0x2CEAF)  #
+            or (cp >= 0xF900 and cp <= 0xFAFF)
+            or (cp >= 0x2F800 and cp <= 0x2FA1F)  #
+        ):  #
             return True
 
         return False
@@ -341,14 +364,15 @@ def _clean_text(self, text):
         output = []
         for char in text:
             cp = ord(char)
-            if cp == 0 or cp == 0xfffd or _is_control(char):
+            if cp == 0 or cp == 0xFFFD or _is_control(char):
                 continue
             if _is_whitespace(char):
                 output.append(" ")
             else:
                 output.append(char)
         return "".join(output)
 
+
 class WordpieceTokenizer(object):
     """Runs WordPiece tokenziation."""
 
@@ -410,6 +434,7 @@ def tokenize(self, text):
                 output_tokens.extend(sub_tokens)
         return output_tokens
 
+
 class FullTokenizer(object):
     """Runs end-to-end tokenziation."""
 
@@ -435,27 +460,27 @@ def convert_ids_to_tokens(self, ids):
 
     @staticmethod
     def convert_tokens_to_string(tokens, clean_up_tokenization_spaces=True):
-        """ Converts a sequence of tokens (string) in a single string. """
+        """Converts a sequence of tokens (string) in a single string."""
 
         def clean_up_tokenization(out_string):
-            """ Clean up a list of simple English tokenization artifacts
+            """Clean up a list of simple English tokenization artifacts
             like spaces before punctuations and abreviated forms.
             """
             out_string = (
                 out_string.replace(" .", ".")
-                    .replace(" ?", "?")
-                    .replace(" !", "!")
-                    .replace(" ,", ",")
-                    .replace(" ' ", "'")
-                    .replace(" n't", "n't")
-                    .replace(" 'm", "'m")
-                    .replace(" 's", "'s")
-                    .replace(" 've", "'ve")
-                    .replace(" 're", "'re")
+                .replace(" ?", "?")
+                .replace(" !", "!")
+                .replace(" ,", ",")
+                .replace(" ' ", "'")
+                .replace(" n't", "n't")
+                .replace(" 'm", "'m")
+                .replace(" 's", "'s")
+                .replace(" 've", "'ve")
+                .replace(" 're", "'re")
             )
             return out_string
 
-        text = ' '.join(tokens).replace(' ##', '').strip()
+        text = " ".join(tokens).replace(" ##", "").strip()
         if clean_up_tokenization_spaces:
             clean_text = clean_up_tokenization(text)
             return clean_text
@@ -467,7 +492,6 @@ def vocab_size(self):
 
 
 if __name__ == "__main__":
-
     ROOT_PATH = ""
     txt_model_path = f"{ROOT_PATH}/deploy/vit-b-16.txt.fp16.onnx"
     img_model_path = f"{ROOT_PATH}/deploy/vit-b-16.img.fp16.onnx"
@@ -479,4 +503,4 @@ def vocab_size(self):
     input_image = cv2.imread(image_path)
     input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
     prob = clip(input_image, input_text)
-    print(f"prob: {prob}")
+    print(f"prob: {prob}")
diff --git a/anylabeling/services/auto_labeling/__base__/sam.py b/anylabeling/services/auto_labeling/__base__/sam.py
@@ -5,6 +5,7 @@
 from typing import Tuple
 from copy import deepcopy
 
+
 class SegmentAnythingONNX:
     """Segmentation model using SegmentAnything"""
 
@@ -232,6 +233,7 @@ def get_approx_contours(masks):
 
         return approx_contours
 
+
 class EdgeSAMONNX(object):
     def __init__(
         self, encoder_model_path, decoder_model_path, target_length

diff --git a/anylabeling/services/auto_labeling/__base__/yolo.py b/anylabeling/services/auto_labeling/__base__/yolo.py
@@ -423,13 +423,17 @@ def process_mask(self, protos, masks_in, bboxes, shape, upsample=False):
             if masks.shape[0] == 1:
                 masks_np = np.squeeze(masks, axis=0)
                 masks_resized = cv2.resize(
-                    masks_np, (shape[1], shape[0]), interpolation=cv2.INTER_LINEAR
+                    masks_np,
+                    (shape[1], shape[0]),
+                    interpolation=cv2.INTER_LINEAR,
                 )
                 masks = np.expand_dims(masks_resized, axis=0)
             else:
                 masks_np = np.transpose(masks, (1, 2, 0))
                 masks_resized = cv2.resize(
-                    masks_np, (shape[1], shape[0]), interpolation=cv2.INTER_LINEAR
+                    masks_np,
+                    (shape[1], shape[0]),
+                    interpolation=cv2.INTER_LINEAR,
                 )
                 masks = np.transpose(masks_resized, (2, 0, 1))
         masks[masks > 0.5] = 1