Skip to content

Commit

Permalink
Bump version to v2.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
CVHub520 committed Jan 31, 2024
1 parent cb0c8c7 commit d2974c8
Show file tree
Hide file tree
Showing 25 changed files with 14,648 additions and 14,399 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,12 @@

## 🥳 What's New [⏏️](#📄-table-of-contents)

- Feb. 2024:
- 🤗 Release the latest version [2.3.1](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.1) 🤗
- Jan. 2024:
- 👏👏👏 Combining CLIP and SAM models for enhanced semantic and spatial understanding. An example can be found [here](./anylabeling/configs/auto_labeling/edge_sam_with_chinese_clip.yaml).
- 🔥🔥🔥 Adding support for the [Depth Anything](https://github.com/LiheYoung/Depth-Anything.git) model in the depth estimation task.
- 🤗 Release the latest version [2.3.0](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.0) 🤗
- Release version [2.3.0](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.0).
- Support [YOLOv8-OBB](https://github.com/ultralytics/ultralytics) model.
- Support [RTMDet](https://github.com/open-mmlab/mmyolo/tree/main/configs/rtmdet) and [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmpose) model.
- Release a [chinese license plate](https://github.com/we0091234/Chinese_license_plate_detection_recognition) detection and recognition model based on YOLOv5.
Expand Down
4 changes: 3 additions & 1 deletion README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,13 @@

## 🥳 新功能 [⏏️](#📄-目录)

- 2024年2月:
- - 🤗 发布[2.3.1](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.1)最新版本 🤗
- 2024年1月:
- 支持一键截取子图功能。
- 👏👏👏 结合CLIP和SAM模型,实现更强大的语义和空间理解。具体可参考此[示例](./anylabeling/configs/auto_labeling/edge_sam_with_chinese_clip.yaml)
- 🔥🔥🔥 在深度估计任务中增加对[Depth Anything](https://github.com/LiheYoung/Depth-Anything.git)模型的支持。
- 🤗 发布[2.3.0](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.0)最新版本 🤗
- 发布[2.3.0](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.3.0)版本
- 支持 [YOLOv8-OBB](https://github.com/ultralytics/ultralytics) 模型。
- 支持 [RTMDet](https://github.com/open-mmlab/mmyolo/tree/main/configs/rtmdet)[RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmpose) 模型。
- 支持基于YOLOv5的[中文车牌](https://github.com/we0091234/Chinese_license_plate_detection_recognition)检测和识别模型。
Expand Down
2 changes: 1 addition & 1 deletion anylabeling/app_info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__appname__ = "X-AnyLabeling"
__appdescription__ = "Advanced Auto Labeling Solution with Added Features"
__version__ = "2.3.0"
__version__ = "2.3.1"
__preferred_device__ = "CPU" # GPU or CPU
27,777 changes: 13,927 additions & 13,850 deletions anylabeling/resources/resources.py

Large diffs are not rendered by default.

511 changes: 281 additions & 230 deletions anylabeling/resources/translations/en_US.ts

Large diffs are not rendered by default.

Binary file modified anylabeling/resources/translations/zh_CN.qm
Binary file not shown.
517 changes: 284 additions & 233 deletions anylabeling/resources/translations/zh_CN.ts

Large diffs are not rendered by default.

114 changes: 69 additions & 45 deletions anylabeling/services/auto_labeling/__base__/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,43 +13,40 @@
_MODEL_INFO = {
"ViT-B-16": {
"struct": "ViT-B-16@RoBERTa-wwm-ext-base-chinese",
"input_resolution": 224
"input_resolution": 224,
},
"ViT-L-14": {
"struct": "ViT-L-14@RoBERTa-wwm-ext-base-chinese",
"input_resolution": 224
"input_resolution": 224,
},
"ViT-L-14-336": {
"struct": "ViT-L-14-336@RoBERTa-wwm-ext-base-chinese",
"input_resolution": 336
"input_resolution": 336,
},
"ViT-H-14": {
"struct": "ViT-H-14@RoBERTa-wwm-ext-large-chinese",
"input_resolution": 224
},
"RN50": {
"struct": "RN50@RBT3-chinese",
"input_resolution": 224
"input_resolution": 224,
},
"RN50": {"struct": "RN50@RBT3-chinese", "input_resolution": 224},
}


class ChineseClipONNX:
"""Ref: https://github.com/OFA-Sys/Chinese-CLIP"""

def __init__(
self,
txt_model_path: str,
img_model_path: str,
model_arch: str,
device: str = "cpu",
context_length: int = 52
context_length: int = 52,
) -> None:

# Load models
self.txt_net = OnnxBaseModel(txt_model_path, device_type=device)
self.img_net = OnnxBaseModel(img_model_path, device_type=device)
# Image settings
self.image_size = _MODEL_INFO[model_arch]['input_resolution']
self.image_size = _MODEL_INFO[model_arch]["input_resolution"]
# Text settings
self._tokenizer = FullTokenizer()
self.context_length = context_length
Expand Down Expand Up @@ -94,12 +91,19 @@ def normalize(data, mean, std):
return arrays

def image_preprocess(
self, image, image_size=224, bgr2rgb=False,
mean_value=[0.48145466, 0.4578275, 0.40821073],
std_value=[0.26862954, 0.26130258, 0.27577711],
):
self,
image,
image_size=224,
bgr2rgb=False,
mean_value=[0.48145466, 0.4578275, 0.40821073],
std_value=[0.26862954, 0.26130258, 0.27577711],
):
# Resize using OpenCV
image_size = (image_size, image_size) if isinstance(image_size, int) else image_size
image_size = (
(image_size, image_size)
if isinstance(image_size, int)
else image_size
)
image = cv2.resize(image, image_size)
# Convert to RGB if needed
if bgr2rgb:
Expand Down Expand Up @@ -131,16 +135,20 @@ def tokenize(self, texts, context_length=52):

all_tokens = []
for text in texts:
tokenized_text = [self._tokenizer.vocab['[CLS]']] \
+ self._tokenizer.convert_tokens_to_ids(self._tokenizer.tokenize(text))[:context_length - 2] \
+ [self._tokenizer.vocab['[SEP]']]
tokenized_text = (
[self._tokenizer.vocab["[CLS]"]]
+ self._tokenizer.convert_tokens_to_ids(
self._tokenizer.tokenize(text)
)[: context_length - 2]
+ [self._tokenizer.vocab["[SEP]"]]
)
all_tokens.append(tokenized_text)

result = np.zeros((len(all_tokens), context_length), dtype=np.int64)

for i, tokens in enumerate(all_tokens):
assert len(tokens) <= context_length
result[i, :len(tokens)] = np.array(tokens)
result[i, : len(tokens)] = np.array(tokens)

return result

Expand All @@ -150,6 +158,7 @@ def default_vocab():
current_dir = os.path.dirname(__file__)
return os.path.join(current_dir, "..", "configs", "clip_vocab.txt")


def convert_to_unicode(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if six.PY3:
Expand All @@ -162,6 +171,7 @@ def convert_to_unicode(text):
else:
raise ValueError("Not running on Python2 or Python 3?")


def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict()
Expand All @@ -176,6 +186,7 @@ def load_vocab(vocab_file):
index += 1
return vocab


def whitespace_tokenize(text):
"""Runs basic whitespace cleaning and splitting on a piece of text."""
text = text.strip()
Expand All @@ -184,21 +195,27 @@ def whitespace_tokenize(text):
tokens = text.split()
return tokens


def _is_punctuation(char):
"""Checks whether `chars` is a punctuation character."""
cp = ord(char)
# We treat all non-letter/number ASCII as punctuation.
# Characters such as "^", "$", and "`" are not in the Unicode
# Punctuation class but we treat them as punctuation anyways, for
# consistency.
if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
(cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
if (
(cp >= 33 and cp <= 47)
or (cp >= 58 and cp <= 64)
or (cp >= 91 and cp <= 96)
or (cp >= 123 and cp <= 126)
):
return True
cat = unicodedata.category(char)
if cat.startswith("P"):
return True
return False


def _is_control(char):
"""Checks whether `chars` is a control character."""
# These are technically control characters but we count them as whitespace
Expand All @@ -210,6 +227,7 @@ def _is_control(char):
return True
return False


def _is_whitespace(char):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them
Expand All @@ -221,16 +239,19 @@ def _is_whitespace(char):
return True
return False


def convert_by_vocab(vocab, items):
"""Converts a sequence of [tokens|ids] using the vocab."""
output = []
for item in items:
output.append(vocab[item])
return output


def convert_tokens_to_ids(vocab, tokens):
return convert_by_vocab(vocab, tokens)


def convert_ids_to_tokens(inv_vocab, ids):
return convert_by_vocab(inv_vocab, ids)

Expand Down Expand Up @@ -324,14 +345,16 @@ def _is_chinese_char(self, cp):
# as is Japanese Hiragana and Katakana. Those alphabets are used to write
# space-separated words, so they are not treated specially and handled
# like the all of the other languages.
if ((cp >= 0x4E00 and cp <= 0x9FFF) or #
(cp >= 0x3400 and cp <= 0x4DBF) or #
(cp >= 0x20000 and cp <= 0x2A6DF) or #
(cp >= 0x2A700 and cp <= 0x2B73F) or #
(cp >= 0x2B740 and cp <= 0x2B81F) or #
(cp >= 0x2B820 and cp <= 0x2CEAF) or
(cp >= 0xF900 and cp <= 0xFAFF) or #
(cp >= 0x2F800 and cp <= 0x2FA1F)): #
if (
(cp >= 0x4E00 and cp <= 0x9FFF)
or (cp >= 0x3400 and cp <= 0x4DBF) #
or (cp >= 0x20000 and cp <= 0x2A6DF) #
or (cp >= 0x2A700 and cp <= 0x2B73F) #
or (cp >= 0x2B740 and cp <= 0x2B81F) #
or (cp >= 0x2B820 and cp <= 0x2CEAF) #
or (cp >= 0xF900 and cp <= 0xFAFF)
or (cp >= 0x2F800 and cp <= 0x2FA1F) #
): #
return True

return False
Expand All @@ -341,14 +364,15 @@ def _clean_text(self, text):
output = []
for char in text:
cp = ord(char)
if cp == 0 or cp == 0xfffd or _is_control(char):
if cp == 0 or cp == 0xFFFD or _is_control(char):
continue
if _is_whitespace(char):
output.append(" ")
else:
output.append(char)
return "".join(output)


class WordpieceTokenizer(object):
"""Runs WordPiece tokenziation."""

Expand Down Expand Up @@ -410,6 +434,7 @@ def tokenize(self, text):
output_tokens.extend(sub_tokens)
return output_tokens


class FullTokenizer(object):
"""Runs end-to-end tokenziation."""

Expand All @@ -435,27 +460,27 @@ def convert_ids_to_tokens(self, ids):

@staticmethod
def convert_tokens_to_string(tokens, clean_up_tokenization_spaces=True):
""" Converts a sequence of tokens (string) in a single string. """
"""Converts a sequence of tokens (string) in a single string."""

def clean_up_tokenization(out_string):
""" Clean up a list of simple English tokenization artifacts
"""Clean up a list of simple English tokenization artifacts
like spaces before punctuations and abreviated forms.
"""
out_string = (
out_string.replace(" .", ".")
.replace(" ?", "?")
.replace(" !", "!")
.replace(" ,", ",")
.replace(" ' ", "'")
.replace(" n't", "n't")
.replace(" 'm", "'m")
.replace(" 's", "'s")
.replace(" 've", "'ve")
.replace(" 're", "'re")
.replace(" ?", "?")
.replace(" !", "!")
.replace(" ,", ",")
.replace(" ' ", "'")
.replace(" n't", "n't")
.replace(" 'm", "'m")
.replace(" 's", "'s")
.replace(" 've", "'ve")
.replace(" 're", "'re")
)
return out_string

text = ' '.join(tokens).replace(' ##', '').strip()
text = " ".join(tokens).replace(" ##", "").strip()
if clean_up_tokenization_spaces:
clean_text = clean_up_tokenization(text)
return clean_text
Expand All @@ -467,7 +492,6 @@ def vocab_size(self):


if __name__ == "__main__":

ROOT_PATH = ""
txt_model_path = f"{ROOT_PATH}/deploy/vit-b-16.txt.fp16.onnx"
img_model_path = f"{ROOT_PATH}/deploy/vit-b-16.img.fp16.onnx"
Expand All @@ -479,4 +503,4 @@ def vocab_size(self):
input_image = cv2.imread(image_path)
input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
prob = clip(input_image, input_text)
print(f"prob: {prob}")
print(f"prob: {prob}")
2 changes: 2 additions & 0 deletions anylabeling/services/auto_labeling/__base__/sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Tuple
from copy import deepcopy


class SegmentAnythingONNX:
"""Segmentation model using SegmentAnything"""

Expand Down Expand Up @@ -232,6 +233,7 @@ def get_approx_contours(masks):

return approx_contours


class EdgeSAMONNX(object):
def __init__(
self, encoder_model_path, decoder_model_path, target_length
Expand Down
8 changes: 6 additions & 2 deletions anylabeling/services/auto_labeling/__base__/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,17 @@ def process_mask(self, protos, masks_in, bboxes, shape, upsample=False):
if masks.shape[0] == 1:
masks_np = np.squeeze(masks, axis=0)
masks_resized = cv2.resize(
masks_np, (shape[1], shape[0]), interpolation=cv2.INTER_LINEAR
masks_np,
(shape[1], shape[0]),
interpolation=cv2.INTER_LINEAR,
)
masks = np.expand_dims(masks_resized, axis=0)
else:
masks_np = np.transpose(masks, (1, 2, 0))
masks_resized = cv2.resize(
masks_np, (shape[1], shape[0]), interpolation=cv2.INTER_LINEAR
masks_np,
(shape[1], shape[0]),
interpolation=cv2.INTER_LINEAR,
)
masks = np.transpose(masks_resized, (2, 0, 1))
masks[masks > 0.5] = 1
Expand Down
Loading

0 comments on commit d2974c8

Please sign in to comment.