From 23b524eb9317b91338212ae93bbb5a223aaafef9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 26 Jul 2021 14:26:57 +0200 Subject: [PATCH 1/6] Update download script headers --- data/scripts/get_coco.sh | 13 +++++++------ data/scripts/get_coco128.sh | 16 ++++++++-------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh index bce692c29ae2..6f401e4fbc49 100755 --- a/data/scripts/get_coco.sh +++ b/data/scripts/get_coco.sh @@ -1,11 +1,12 @@ #!/bin/bash +# Copyright Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 # COCO 2017 dataset http://cocodataset.org -# Download command: bash data/scripts/get_coco.sh -# Train command: python train.py --data coco.yaml -# Default dataset location is next to YOLOv5: -# /parent_folder -# /coco -# /yolov5 +# YOLOv5 🚀 example usage: bash data/scripts/get_coco.sh +# parent +# ├── yolov5 +# └── datasets +# └── coco ← downloads here + # Download/unzip labels d='../datasets' # unzip directory diff --git a/data/scripts/get_coco128.sh b/data/scripts/get_coco128.sh index 395043b5b2dc..fca3ce727b5f 100644 --- a/data/scripts/get_coco128.sh +++ b/data/scripts/get_coco128.sh @@ -1,14 +1,14 @@ #!/bin/bash -# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 -# Download command: bash data/scripts/get_coco128.sh -# Train command: python train.py --data coco128.yaml -# Default dataset location is next to /yolov5: -# /parent_folder -# /coco128 -# /yolov5 +# Copyright Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 +# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) +# YOLOv5 🚀 example usage: bash data/scripts/get_coco128.sh +# parent +# ├── yolov5 +# └── datasets +# └── coco128 ← downloads here # Download/unzip images and labels -d='../' # unzip directory +d='../datasets' # unzip directory url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ f='coco128.zip' # or 'coco2017labels-segments.zip', 68 MB echo 'Downloading' $url$f ' ...' From 4fca6e4c59048bfa982de3a1a508b5f9419bed1f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 26 Jul 2021 14:31:50 +0200 Subject: [PATCH 2/6] cleanup --- data/scripts/download_weights.sh | 9 +++++++-- data/scripts/get_coco.sh | 3 +-- data/scripts/get_coco128.sh | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/data/scripts/download_weights.sh b/data/scripts/download_weights.sh index 6a279f1636fc..5d74f0266815 100755 --- a/data/scripts/download_weights.sh +++ b/data/scripts/download_weights.sh @@ -1,7 +1,12 @@ #!/bin/bash +# Copyright Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 # Download latest models from https://github.com/ultralytics/yolov5/releases -# Usage: -# $ bash path/to/download_weights.sh +# YOLOv5 🚀 example usage: bash path/to/download_weights.sh +# parent +# └── yolov5 +# ├── yolov5s.pt ← downloads here +# ├── yolov5m.pt +# └── ... python - < Date: Mon, 26 Jul 2021 14:40:44 +0200 Subject: [PATCH 3/6] bug fix attempt --- train.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/train.py b/train.py index db045c766716..31fb0f972599 100644 --- a/train.py +++ b/train.py @@ -49,6 +49,11 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) +import yaml.reader +import re + +yaml.reader.Reader.NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF\]') + def train(hyp, # path/to/hyp.yaml or hyp dictionary opt, device, From 61820c8c20c82647afc76eb685573b44f46cf065 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 26 Jul 2021 14:47:27 +0200 Subject: [PATCH 4/6] bug fix attempt2 --- train.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index 31fb0f972599..fd910a56d667 100644 --- a/train.py +++ b/train.py @@ -84,7 +84,10 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary cuda = device.type != 'cpu' init_seeds(1 + RANK) with open(data) as f: - data_dict = yaml.safe_load(f) # data dict + data_dict = yaml.safe_load(f.read().replace(u'\1F680', '') ) # data dict + # f.read().replace(u'\x82', '') # \1F680 + # f.read().replace(u'\1F680', '') # \1F680 + nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check From 73adff0680aa00033fa5b7e3bb1958b76f3d6bbf Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 26 Jul 2021 15:03:19 +0200 Subject: [PATCH 5/6] bug fix attempt3 --- train.py | 11 ++--------- val.py | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/train.py b/train.py index fd910a56d667..0604d18ca247 100644 --- a/train.py +++ b/train.py @@ -49,11 +49,6 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -import yaml.reader -import re - -yaml.reader.Reader.NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF\]') - def train(hyp, # path/to/hyp.yaml or hyp dictionary opt, device, @@ -83,10 +78,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary plots = not evolve # create plots cuda = device.type != 'cpu' init_seeds(1 + RANK) - with open(data) as f: - data_dict = yaml.safe_load(f.read().replace(u'\1F680', '') ) # data dict - # f.read().replace(u'\x82', '') # \1F680 - # f.read().replace(u'\1F680', '') # \1F680 + with open(data, encoding='ascii', errors='ignore') as f: + data_dict = yaml.safe_load(f) # data dict nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names diff --git a/val.py b/val.py index f20877e8aa0b..c58bcdb209c2 100644 --- a/val.py +++ b/val.py @@ -123,7 +123,7 @@ def run(data, # model = nn.DataParallel(model) # Data - with open(data) as f: + with open(data, encoding='ascii', errors='ignore') as f: data = yaml.safe_load(f) check_dataset(data) # check From bd46d5f32b34c74e3411913d37972cdc27e13c03 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 26 Jul 2021 15:15:58 +0200 Subject: [PATCH 6/6] cleanup --- train.py | 2 +- utils/autoanchor.py | 10 ++++------ utils/datasets.py | 2 +- utils/loggers/wandb/log_dataset.py | 4 ++-- utils/loggers/wandb/wandb_utils.py | 6 +++--- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/train.py b/train.py index 0604d18ca247..bd1fa9c74328 100644 --- a/train.py +++ b/train.py @@ -79,7 +79,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary cuda = device.type != 'cpu' init_seeds(1 + RANK) with open(data, encoding='ascii', errors='ignore') as f: - data_dict = yaml.safe_load(f) # data dict + data_dict = yaml.safe_load(f) nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names diff --git a/utils/autoanchor.py b/utils/autoanchor.py index 6abdd2d38832..2571fc99ac89 100644 --- a/utils/autoanchor.py +++ b/utils/autoanchor.py @@ -60,11 +60,11 @@ def metric(k): # compute metric print('') # newline -def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): +def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): """ Creates kmeans-evolved anchors from training dataset Arguments: - path: path to dataset *.yaml, or a loaded dataset + dataset: path to data.yaml, or a loaded dataset n: number of anchors img_size: image size used for training thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 @@ -103,13 +103,11 @@ def print_results(k): print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg return k - if isinstance(path, str): # *.yaml file - with open(path) as f: + if isinstance(dataset, str): # *.yaml file + with open(dataset, encoding='ascii', errors='ignore') as f: data_dict = yaml.safe_load(f) # model dict from utils.datasets import LoadImagesAndLabels dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) - else: - dataset = path # dataset # Get label wh shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) diff --git a/utils/datasets.py b/utils/datasets.py index d3edafa99bd0..5b5ded4bbc41 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -909,7 +909,7 @@ def unzip(path): return False, None, path zipped, data_dir, yaml_path = unzip(Path(path)) - with open(check_file(yaml_path)) as f: + with open(check_file(yaml_path), encoding='ascii', errors='ignore') as f: data = yaml.safe_load(f) # data dict if zipped: data['path'] = data_dir # TODO: should this be dir.resolve()? diff --git a/utils/loggers/wandb/log_dataset.py b/utils/loggers/wandb/log_dataset.py index 3a9a3d79fe01..b5663c92ee09 100644 --- a/utils/loggers/wandb/log_dataset.py +++ b/utils/loggers/wandb/log_dataset.py @@ -8,9 +8,9 @@ def create_dataset_artifact(opt): - with open(opt.data) as f: + with open(opt.data, encoding='ascii', errors='ignore') as f: data = yaml.safe_load(f) # data dict - logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation') + logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation') # TODO: return value unused if __name__ == '__main__': diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py index db2693a9e11c..581041acbdb7 100644 --- a/utils/loggers/wandb/wandb_utils.py +++ b/utils/loggers/wandb/wandb_utils.py @@ -62,7 +62,7 @@ def check_wandb_resume(opt): def process_wandb_config_ddp_mode(opt): - with open(check_file(opt.data)) as f: + with open(check_file(opt.data), encoding='ascii', errors='ignore') as f: data_dict = yaml.safe_load(f) # data dict train_dir, val_dir = None, None if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX): @@ -150,7 +150,7 @@ def check_and_upload_dataset(self, opt): opt.single_cls, 'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem) print("Created dataset config file ", config_path) - with open(config_path) as f: + with open(config_path, encoding='ascii', errors='ignore') as f: wandb_data_dict = yaml.safe_load(f) return wandb_data_dict @@ -226,7 +226,7 @@ def log_model(self, path, opt, epoch, fitness_score, best_model=False): print("Saving model artifact on epoch ", epoch + 1) def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False): - with open(data_file) as f: + with open(data_file, encoding='ascii', errors='ignore') as f: data = yaml.safe_load(f) # data dict check_dataset(data) nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])