From f31c6bab113bdc39a629bbae6e9897de94a346d5 Mon Sep 17 00:00:00 2001 From: edward <414252595@qq.com> Date: Thu, 24 Sep 2020 17:53:59 +0800 Subject: [PATCH 01/20] add RTMP stream function with FFMPEG --- Makefile | 13 +- src/detector.c | 23 +- src/stream.cpp | 580 +++++++++++++++++++++++++++++++++++++++++++++++ src/stream.h | 18 ++ src/streamer.cpp | 262 +++++++++++++++++++++ src/streamer.hpp | 199 ++++++++++++++++ 6 files changed, 1085 insertions(+), 10 deletions(-) create mode 100644 src/stream.cpp create mode 100644 src/stream.h create mode 100644 src/streamer.cpp create mode 100644 src/streamer.hpp diff --git a/Makefile b/Makefile index e6a4ad73884..102ea08d6c8 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ APPNAMESO=uselib endif ifeq ($(USE_CPP), 1) -CC=g++ +CC=g++ -std=c++11 else CC=gcc endif @@ -68,9 +68,9 @@ endif CPP=g++ -std=c++11 NVCC=nvcc OPTS=-Ofast -LDFLAGS= -lm -pthread -COMMON= -Iinclude/ -I3rdparty/stb/include -CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC +LDFLAGS= -lm -pthread -L/usr/local/lib -L/usr/local/Cellar/ffmpeg/4.1.3/lib -lavformat -lavcodec -lavutil -lswscale +COMMON= -Iinclude/ -I3rdparty/stb/include -I/usr/local/include -I/usr/local/Cellar/ffmpeg/4.1.3/include +CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas #-fPIC -lavformat -lavcodec -lavutil -lswscale ifeq ($(DEBUG), 1) #OPTS= -O0 -g @@ -143,7 +143,7 @@ LDFLAGS+= -L/usr/local/zed/lib -lsl_zed endif endif -OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o +OBJ=stream.o streamer.o image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o @@ -161,7 +161,8 @@ $(LIBNAMESO): $(OBJDIR) $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp $(CPP) -shared -std=c++11 -fvisibility=hidden -DLIB_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS) $(APPNAMESO): $(LIBNAMESO) include/yolo_v2_class.hpp src/yolo_console_dll.cpp - $(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO) + #$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO) + $(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ $(LIBNAMESO) endif $(EXEC): $(OBJS) diff --git a/src/detector.c b/src/detector.c index 52511fb0dfb..4f05e0077ee 100644 --- a/src/detector.c +++ b/src/detector.c @@ -8,6 +8,7 @@ #include "box.h" #include "demo.h" #include "option_list.h" +#include "stream.h" #ifndef __COMPAR_FN_T #define __COMPAR_FN_T @@ -157,7 +158,6 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i args.mosaic_bound = net.mosaic_bound; args.contrastive = net.contrastive; args.contrastive_jit_flip = net.contrastive_jit_flip; - args.contrastive_color = net.contrastive_color; if (dont_show && show_imgs) show_imgs = 2; args.show_imgs = show_imgs; @@ -1965,6 +1965,15 @@ void run_detector(int argc, char **argv) int ext_output = find_arg(argc, argv, "-ext_output"); int save_labels = find_arg(argc, argv, "-save_labels"); char* chart_path = find_char_arg(argc, argv, "-chart", 0); + + int stream_bitrate = find_int_arg(argc, argv, "-stream_bitrate", 0); + int stream_frame_width = find_int_arg(argc, argv, "-stream_width", 0); + int stream_frame_height = find_int_arg(argc, argv, "-stream_height", 0); + int stream_gop_size = find_int_arg(argc, argv, "-stream_gop", 0); + int stream_fps = find_int_arg(argc, argv, "-stream_fps", 0); + char *stream_addr = find_char_arg(argc, argv, "-stream_address", 0); + char *stream_profile = find_char_arg(argc, argv, "-stream_profile", "high444"); + if (argc < 4) { fprintf(stderr, "usage: %s %s [train/test/valid/demo/map] [data] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; @@ -2012,7 +2021,7 @@ void run_detector(int argc, char **argv) int it_num = 100; draw_object(datacfg, cfg, weights, filename, thresh, dont_show, it_num, letter_box, benchmark_layers); } - else if (0 == strcmp(argv[2], "demo")) { + else if (0 == strcmp(argv[2], "demo") || 0 == strcmp(argv[2], "stream")) { list *options = read_data_cfg(datacfg); int classes = option_find_int(options, "classes", 20); char *name_list = option_find_str(options, "names", "data/names.list"); @@ -2020,9 +2029,15 @@ void run_detector(int argc, char **argv) if (filename) if (strlen(filename) > 0) if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0; - demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename, - mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers); + if (0 == strcmp(argv[2], "demo")){ + demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename, + mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers); + }else if (0 == strcmp(argv[2], "stream")){ + stream(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename, + mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers, + stream_bitrate, stream_addr, stream_frame_width, stream_frame_height, stream_profile, stream_gop_size, stream_fps); + } free_list_contents_kvp(options); free_list(options); } diff --git a/src/stream.cpp b/src/stream.cpp new file mode 100644 index 00000000000..0a60b2e61f1 --- /dev/null +++ b/src/stream.cpp @@ -0,0 +1,580 @@ +#include "network.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" +#include "image.h" +//#include "demo.h" +#include "darknet.h" +#ifdef WIN32 +#include +#include "gettimeofday.h" +#else +#include +#endif + +#ifdef OPENCV + +#include "http_stream.h" +#include "streamer.hpp" +#include "stream.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace streamer; +using time_point = std::chrono::high_resolution_clock::time_point; +using high_resolution_clock = std::chrono::high_resolution_clock; +using std::cerr; +using std::endl; + +static char **demo_names; +static image **demo_alphabet; +static int demo_classes; + +static int nboxes = 0; +static detection *dets = NULL; + +static network net; +static image in_s ; +static image det_s; + +static cap_cv *cap; +static float fps = 0; +static float demo_thresh = 0; +static int demo_ext_output = 0; +static long long int frame_id = 0; +static int demo_json_port = -1; + + +static int avg_frames; +static int demo_index = 0; +static mat_cv** cv_images; + +mat_cv* in_img; +mat_cv* det_img; +mat_cv* show_img; + +static volatile int flag_exit; +static int letter_box = 0; + +static const int thread_wait_ms = 1; +static volatile int run_fetch_in_thread = 0; +static volatile int run_detect_in_thread = 0; + +class MovingAverage +{ + int size; + int pos; + bool crossed; + std::vector v; + +public: + explicit MovingAverage(int sz) + { + size = sz; + v.resize(size); + pos = 0; + crossed = false; + } + + void add_value(double value) + { + v[pos] = value; + pos++; + if(pos == size) { + pos = 0; + crossed = true; + } + } + + double get_average() + { + double avg = 0.0; + int last = crossed ? size : pos; + int k=0; + for(k=0;k min_plus_margin) { + size_t excess = streamed_frames - min_plus_margin; + double dexcess = excess; + + //add a delay ~ excess*processing_time +//#define SHOW_DELAY +#ifdef SHOW_DELAY + double delay = dexcess*avg_frame_time*1000000.0; + printf("frame %07lu adding delay %.4f\n", streamed_frames, delay); + printf("avg fps = %.2f\n", streamed_frames/elapsed); +#endif + usleep(dexcess*avg_frame_time*1000000.0); + } +} + +void process_frame(mat_cv *mat_ptr, cv::Mat &out) +{ + try{ + if (mat_ptr == NULL) return; + cv::Mat &mat = *(cv::Mat *)mat_ptr; + mat.copyTo(out); + }catch (...) { + cerr << "OpenCV exception: process_frame \n"; + } +} + + +void stream_frame(Streamer &streamer, const cv::Mat &image) +{ + streamer.stream_frame(image.data); +} + + +void stream_frame(Streamer &streamer, const cv::Mat &image, int64_t frame_duration) +{ + streamer.stream_frame(image.data, frame_duration); +} + +void *fetch_in_thread(void *ptr) +{ + while (!custom_atomic_load_int(&flag_exit)) { + while (!custom_atomic_load_int(&run_fetch_in_thread)) { + if (custom_atomic_load_int(&flag_exit)) return 0; + this_thread_yield(); + } + int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream + if (letter_box) + in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream); + else + in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream); + if (!in_s.data) { + printf("Stream closed.\n"); + custom_atomic_store_int(&flag_exit, 1); + custom_atomic_store_int(&run_fetch_in_thread, 0); + //exit(EXIT_FAILURE); + return 0; + } + //in_s = resize_image(in, net.w, net.h); + + custom_atomic_store_int(&run_fetch_in_thread, 0); + } + return 0; +} + +void *fetch_in_thread_sync(void *ptr) +{ + custom_atomic_store_int(&run_fetch_in_thread, 1); + while (custom_atomic_load_int(&run_fetch_in_thread)) this_thread_sleep_for(thread_wait_ms); + return 0; +} + +void *detect_in_thread(void *ptr) +{ + while (!custom_atomic_load_int(&flag_exit)) { + while (!custom_atomic_load_int(&run_detect_in_thread)) { + if (custom_atomic_load_int(&flag_exit)) return 0; + this_thread_yield(); + } + + layer l = net.layers[net.n - 1]; + float *X = det_s.data; + float *prediction = network_predict(net, X); + + cv_images[demo_index] = det_img; + det_img = cv_images[(demo_index + avg_frames / 2 + 1) % avg_frames]; + demo_index = (demo_index + 1) % avg_frames; + + if (letter_box) + dets = get_network_boxes(&net, get_width_mat(in_img), get_height_mat(in_img), demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box + else + dets = get_network_boxes(&net, net.w, net.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized + + custom_atomic_store_int(&run_detect_in_thread, 0); + } + + return 0; +} + +void *detect_in_thread_sync(void *ptr) +{ + custom_atomic_store_int(&run_detect_in_thread, 1); + while (custom_atomic_load_int(&run_detect_in_thread)) this_thread_sleep_for(thread_wait_ms); + return 0; +} + +double get_wall_time() +{ + struct timeval walltime; + if (gettimeofday(&walltime, NULL)) { + return 0; + } + return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001; +} +void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, + int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, + int benchmark, int benchmark_layers, + int stream_bitrate, char *dst_stream_addr, int dst_frame_width, int dst_frame_height, char *stream_profile, int stream_gop_size, int stream_fps) +{ + if (avgframes < 1) avgframes = 1; + avg_frames = avgframes; + letter_box = letter_box_in; + in_img = det_img = show_img = NULL; + //skip = frame_skip; + image **alphabet = load_alphabet(); + int delay = frame_skip; + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_ext_output = ext_output; + demo_json_port = json_port; + printf("Demo\n"); + net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 + if(weightfile){ + load_weights(&net, weightfile); + } + net.benchmark_layers = benchmark_layers; + fuse_conv_batchnorm(net); + calculate_binary_weights(net); + srand(2222222); + + if(filename){ + printf("video file: %s\n", filename); + cap = get_capture_video_stream(filename); + }else{ + printf("Webcam index: %d\n", cam_index); + cap = get_capture_webcam(cam_index); + } + + if (!cap) { +#ifdef WIN32 + printf("Check that you have copied file opencv_ffmpeg340_64.dll to the same directory where is darknet.exe \n"); +#endif + error("Couldn't connect to webcam.\n"); + } + + layer l = net.layers[net.n-1]; + int j; + + cv_images = (mat_cv**)xcalloc(avg_frames, sizeof(mat_cv)); + + int i; + for (i = 0; i < net.n; ++i) { + layer lc = net.layers[i]; + if (lc.type == YOLO) { + lc.mean_alpha = 1.0 / avg_frames; + l = lc; + } + } + + if (l.classes != demo_classes) { + printf("\n Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes); + getchar(); + exit(0); + } + + flag_exit = 0; + + custom_thread_t fetch_thread = NULL; + custom_thread_t detect_thread = NULL; + if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + if (custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + + fetch_in_thread_sync(0); //fetch_in_thread(0); + det_img = in_img; + det_s = in_s; + + fetch_in_thread_sync(0); //fetch_in_thread(0); + detect_in_thread_sync(0); //fetch_in_thread(0); + det_img = in_img; + det_s = in_s; + + for (j = 0; j < avg_frames / 2; ++j) { + free_detections(dets, nboxes); + fetch_in_thread_sync(0); //fetch_in_thread(0); + detect_in_thread_sync(0); //fetch_in_thread(0); + det_img = in_img; + det_s = in_s; + } + + int count = 0; + if(!prefix && !dont_show){ + int full_screen = 0; + //create_window_cv("Demo", full_screen, 1352, 1013); + } + + + write_cv* output_video_writer = NULL; + if (out_filename && !flag_exit) + { + int src_fps = 25; + src_fps = get_stream_fps_cpp_cv(cap); + output_video_writer = + create_video_writer(out_filename, 'D', 'I', 'V', 'X', src_fps, get_width_mat(det_img), get_height_mat(det_img), 1); + + //'H', '2', '6', '4' + //'D', 'I', 'V', 'X' + //'M', 'J', 'P', 'G' + //'M', 'P', '4', 'V' + //'M', 'P', '4', '2' + //'X', 'V', 'I', 'D' + //'W', 'M', 'V', '2' + } + + int send_http_post_once = 0; + const double start_time_lim = get_time_point(); + double before = get_time_point(); + double start_time = get_time_point(); + float avg_fps = 0; + int frame_counter = 0; + int global_frame_counter = 0; + + Streamer streamer; + int src_frame_width = get_width_mat(det_img); + int src_frame_height = get_height_mat(det_img); + int src_fps = get_stream_fps_cpp_cv(cap); + printf("video info w = %d, h = %d, fps = %d\n", src_frame_width, src_frame_height, src_fps); + + if (!dst_stream_addr) { + fprintf(stderr, "Please input a valid stream address \n"); + exit(1); + } + if (!dst_frame_width) dst_frame_width = src_frame_width; + if (!dst_frame_height) dst_frame_height = src_frame_height; + if (!stream_bitrate) stream_bitrate = 500000; + if (!stream_fps) stream_fps = src_fps; + //if (!stream_profile) stream_profile = "high444"; + if (!stream_gop_size) stream_gop_size = 10; + + StreamerConfig streamer_config(src_frame_width, src_frame_height, + dst_frame_width, dst_frame_height, + stream_fps, stream_bitrate, stream_gop_size, stream_profile, dst_stream_addr); + + streamer.enable_av_debug_log(); + streamer.init(streamer_config); + printf("stream info w = %d, h = %d, fps = %d, bitrate = %d, profile = %s, gop = %d, address = %s\n", dst_frame_width, dst_frame_height, stream_fps, stream_bitrate, stream_profile, stream_gop_size, dst_stream_addr); + + size_t streamed_frames = 0; + + high_resolution_clock clk; + time_point time_start = clk.now(); + time_point time_prev = time_start; + + MovingAverage moving_average(10); + double avg_frame_time; + + cv::Mat proc_frame; + + time_point time_stop = clk.now(); + auto elapsed_time = std::chrono::duration_cast>(time_stop - time_start); + auto frame_time = std::chrono::duration_cast>(time_stop - time_prev); + + while(1){ + ++count; + { + const float nms = .45; // 0.4F + int local_nboxes = nboxes; + detection *local_dets = dets; + this_thread_yield(); + + if (!benchmark) custom_atomic_store_int(&run_fetch_in_thread, 1); // if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + custom_atomic_store_int(&run_detect_in_thread, 1); // if (custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + + //if (nms) do_nms_obj(local_dets, local_nboxes, l.classes, nms); // bad results + if (nms) { + if (l.nms_kind == DEFAULT_NMS) do_nms_sort(local_dets, local_nboxes, l.classes, nms); + else diounms_sort(local_dets, local_nboxes, l.classes, nms, l.nms_kind, l.beta_nms); + } + + if (l.embedding_size) set_track_id(local_dets, local_nboxes, demo_thresh, l.sim_thresh, l.track_ciou_norm, l.track_history_size, l.dets_for_track, l.dets_for_show); + + //printf("\033[2J"); + //printf("\033[1;1H"); + //printf("\nFPS:%.1f\n", fps); + printf("Objects:\n\n"); + + ++frame_id; + if (demo_json_port > 0) { + int timeout = 400000; + send_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, demo_json_port, timeout); + } + + //char *http_post_server = "webhook.site/898bbd9b-0ddd-49cf-b81d-1f56be98d870"; + if (http_post_host && !send_http_post_once) { + int timeout = 3; // 3 seconds + int http_post_port = 80; // 443 https, 80 http + if (send_http_post_request(http_post_host, http_post_port, filename, + local_dets, nboxes, classes, names, frame_id, ext_output, timeout)) + { + if (time_limit_sec > 0) send_http_post_once = 1; + } + } + + if (!benchmark && !dontdraw_bbox) draw_detections_cv_v3(show_img, local_dets, local_nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes, demo_ext_output); + free_detections(local_dets, local_nboxes); + + printf("\nFPS:%.1f \t AVG_FPS:%.1f\n", fps, avg_fps); + + if(!prefix){ + if (!dont_show) { + const int each_frame = max_val_cmp(1, avg_fps / 60); + if(global_frame_counter % each_frame == 0){ //show_image_mat(show_img, "Demo"); + process_frame(show_img, proc_frame); + if(!filename){ + stream_frame(streamer, proc_frame); + }else{ + stream_frame(streamer, proc_frame, frame_time.count()*streamer.inv_stream_timebase); + } + } + int c = wait_key_cv(1); + if (c == 10) { + if (frame_skip == 0) frame_skip = 60; + else if (frame_skip == 4) frame_skip = 0; + else if (frame_skip == 60) frame_skip = 4; + else frame_skip = 0; + } + else if (c == 27 || c == 1048603) // ESC - exit (OpenCV 2.x / 3.x) + { + flag_exit = 1; + } + } + }else{ + char buff[256]; + sprintf(buff, "%s_%08d.jpg", prefix, count); + if(show_img) save_cv_jpg(show_img, buff); + } + + // if you run it with param -mjpeg_port 8090 then open URL in your web-browser: http://localhost:8090 + if (mjpeg_port > 0 && show_img) { + int port = mjpeg_port; + int timeout = 400000; + int jpeg_quality = 40; // 1 - 100 + send_mjpeg(show_img, port, timeout, jpeg_quality); + } + + // save video file + if (output_video_writer && show_img) { + write_frame_cv(output_video_writer, show_img); + printf("\n cvWriteFrame \n"); + } + + while (custom_atomic_load_int(&run_detect_in_thread)) { + if(avg_fps > 180) this_thread_yield(); + else this_thread_sleep_for(thread_wait_ms); // custom_join(detect_thread, 0); + } + if (!benchmark) { + while (custom_atomic_load_int(&run_fetch_in_thread)) { + if (avg_fps > 180) this_thread_yield(); + else this_thread_sleep_for(thread_wait_ms); // custom_join(fetch_thread, 0); + } + free_image(det_s); + } + + if (time_limit_sec > 0 && (get_time_point() - start_time_lim)/1000000 > time_limit_sec) { + printf(" start_time_lim = %f, get_time_point() = %f, time spent = %f \n", start_time_lim, get_time_point(), get_time_point() - start_time_lim); + break; + } + + if (flag_exit == 1) break; + + if(delay == 0){ + if(!benchmark) release_mat(&show_img); + show_img = det_img; + } + det_img = in_img; + det_s = in_s; + } + --delay; + if(delay < 0){ + delay = frame_skip; + + //double after = get_wall_time(); + //float curr = 1./(after - before); + double after = get_time_point(); // more accurate time measurements + float curr = 1000000. / (after - before); + fps = fps*0.9 + curr*0.1; + before = after; + + float spent_time = (get_time_point() - start_time) / 1000000; + frame_counter++; + global_frame_counter++; + if (spent_time >= 3.0f) { + //printf(" spent_time = %f \n", spent_time); + avg_fps = frame_counter / spent_time; + frame_counter = 0; + start_time = get_time_point(); + } + } + time_stop = clk.now(); + elapsed_time = std::chrono::duration_cast>(time_stop - time_start); + frame_time = std::chrono::duration_cast>(time_stop - time_prev); + + streamed_frames++; + moving_average.add_value(frame_time.count()); + avg_frame_time = moving_average.get_average(); + add_delay(streamed_frames, stream_fps, elapsed_time.count(), avg_frame_time); + + //ok = video_capture.read(read_frame); + time_prev = time_stop; + + } + printf("input video stream closed. \n"); + if (output_video_writer) { + release_video_writer(&output_video_writer); + printf("output_video_writer closed. \n"); + } + + this_thread_sleep_for(thread_wait_ms); + + custom_join(detect_thread, 0); + custom_join(fetch_thread, 0); + + // free memory + free_image(in_s); + free_detections(dets, nboxes); + + demo_index = (avg_frames + demo_index - 1) % avg_frames; + for (j = 0; j < avg_frames; ++j) { + release_mat(&cv_images[j]); + } + free(cv_images); + + free_ptrs((void **)names, net.layers[net.n - 1].classes); + + const int nsize = 8; + for (j = 0; j < nsize; ++j) { + for (i = 32; i < 127; ++i) { + free_image(alphabet[j][i]); + } + free(alphabet[j]); + } + free(alphabet); + free_network(net); + //cudaProfilerStop(); +} +#else +void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, + int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, + int benchmark, int benchmark_layers, + int stream_bitrate, char *dst_stream_addr, int dst_frame_width, int dst_frame_height, char *stream_profile, int stream_gop_size, int stream_fps) +{ + fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); +} +#endif diff --git a/src/stream.h b/src/stream.h new file mode 100644 index 00000000000..df186f94d07 --- /dev/null +++ b/src/stream.h @@ -0,0 +1,18 @@ +#ifndef STREAM_H +#define STREAM_H + +#include "image.h" +#ifdef __cplusplus +extern "C" { +#endif + +void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, + int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, + int benchmark, int benchmark_layers, + int stream_bitrate, char *dst_stream_addr, int dst_frame_width, int dst_frame_height, char *stream_profile, int stream_gop_size, int stream_fps); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/streamer.cpp b/src/streamer.cpp new file mode 100644 index 00000000000..bc1de65bce7 --- /dev/null +++ b/src/streamer.cpp @@ -0,0 +1,262 @@ +#include "streamer.hpp" + +#include +#include +#include +#include +#include + +#define __STDC_CONSTANT_MACROS +#ifdef _WIN32 +//Windows +extern "C" +{ +#include "libavformat/avformat.h" +#include "libavutil/mathematics.h" +#include "libavutil/time.h" +}; +#else +//Linux... +#ifdef __cplusplus +extern "C" +{ +#endif +#include +#include +#include +#ifdef __cplusplus +}; +#endif +#endif + + +namespace streamer +{ + +#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P + +static int encode_and_write_frame(AVCodecContext *codec_ctx, AVFormatContext *fmt_ctx, AVFrame *frame) +{ + AVPacket pkt = {0}; + av_init_packet(&pkt); + + int ret = avcodec_send_frame(codec_ctx, frame); + if (ret < 0) + { + fprintf(stderr, "Error sending frame to codec context!\n"); + return ret; + } + + ret = avcodec_receive_packet(codec_ctx, &pkt); + if (ret < 0) + { + fprintf(stderr, "Error receiving packet from codec context!\n" ); + return ret; + } + + av_interleaved_write_frame(fmt_ctx, &pkt); + av_packet_unref(&pkt); + + return 0; +} + + +static int set_options_and_open_encoder(AVFormatContext *fctx, AVStream *stream, AVCodecContext *codec_ctx, AVCodec *codec, + std::string codec_profile, double width, double height, + int fps, int bitrate, int gop_size, AVCodecID codec_id) +{ + const AVRational dst_fps = {fps, 1}; + + codec_ctx->codec_tag = 0; + codec_ctx->codec_id = codec_id; + codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO; + codec_ctx->width = width; + codec_ctx->height = height; + //codec_ctx->gop_size = 12; + codec_ctx->gop_size = gop_size; + codec_ctx->pix_fmt = STREAM_PIX_FMT; + codec_ctx->framerate = dst_fps; + codec_ctx->time_base = av_inv_q(dst_fps); + codec_ctx->bit_rate = bitrate; + if (fctx->oformat->flags & AVFMT_GLOBALHEADER) + { + codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + } + + stream->time_base = codec_ctx->time_base; //will be set afterwards by avformat_write_header to 1/1000 + + int ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx); + if (ret < 0) + { + fprintf(stderr, "Could not initialize stream codec parameters!\n"); + return 1; + } + + AVDictionary *codec_options = nullptr; + av_dict_set(&codec_options, "profile", codec_profile.c_str(), 0); + av_dict_set(&codec_options, "preset", "ultrafast", 0); + av_dict_set(&codec_options, "tune", "zerolatency", 0); + av_dict_set(&codec_options, "crf", "30", 0); + //av_dict_set(&codec_options, "g", "1", 0); + //av_dict_set(&codec_options, "ar", "44100", 0); + //av_dict_set(&codec_options, "strict", "-2", 0); + //av_dict_set(&codec_options, "-ac", "1", 0); + av_dict_set(&codec_options, "q", "10", 0); + + // open video encoder + ret = avcodec_open2(codec_ctx, codec, &codec_options); + if (ret < 0) + { + fprintf(stderr, "Could not open video encoder!\n"); + return 1; + } + av_dict_free(&codec_options); + return 0; +} + + +Streamer::Streamer() +{ + format_ctx = nullptr; + out_codec = nullptr; + out_stream = nullptr; + out_codec_ctx = nullptr; + rtmp_server_conn = false; + av_register_all(); + inv_stream_timebase = 30.0; + network_init_ok = !avformat_network_init(); +} + + +void Streamer::cleanup() +{ + if(out_codec_ctx) { + avcodec_close(out_codec_ctx); + avcodec_free_context(&out_codec_ctx); + } + + if(format_ctx) { + if(format_ctx->pb) { + avio_close(format_ctx->pb); + } + avformat_free_context(format_ctx); + format_ctx = nullptr; + } +} + + +Streamer::~Streamer() +{ + cleanup(); + avformat_network_deinit(); +} + + + +void Streamer::stream_frame(const uint8_t *data) +{ + if(can_stream()) { + const int stride[] = {static_cast(config.src_width*3)}; + sws_scale(scaler.ctx, &data, stride, 0, config.src_height, picture.frame->data, picture.frame->linesize); + picture.frame->pts += av_rescale_q(1, out_codec_ctx->time_base, out_stream->time_base); + encode_and_write_frame(out_codec_ctx, format_ctx, picture.frame); + } +} + + +void Streamer::stream_frame(const uint8_t *data, int64_t frame_duration) +{ + if(can_stream()) { + const int stride[] = {static_cast(config.src_width*3)}; + sws_scale(scaler.ctx, &data, stride, 0, config.src_height, picture.frame->data, picture.frame->linesize); + picture.frame->pts += frame_duration; //time of frame in milliseconds + encode_and_write_frame(out_codec_ctx, format_ctx, picture.frame); + } +} + + +void Streamer::enable_av_debug_log() +{ + //av_log_set_level(AV_LOG_DEBUG); + //av_log_set_level(AV_LOG_QUIET); + //av_log_set_level(AV_LOG_INFO); + av_log_set_level(AV_LOG_VERBOSE); + //av_log_set_level(AV_LOG_MAX_OFFSET); + //av_log_set_level(AV_LOG_TRACE); +} + + +int Streamer::init(const StreamerConfig &streamer_config) +{ + init_ok = false; + cleanup(); + + config = streamer_config; + + if(!network_init_ok) { + return 1; + } + + //initialize format context for output with flv and no filename + avformat_alloc_output_context2(&format_ctx, nullptr, "flv", nullptr); + if(!format_ctx) { + return 1; + } + + //AVIOContext for accessing the resource indicated by url + if (!(format_ctx->oformat->flags & AVFMT_NOFILE)) { + int avopen_ret = avio_open2(&format_ctx->pb, config.server.c_str(), + AVIO_FLAG_WRITE, nullptr, nullptr); + if (avopen_ret < 0) { + fprintf(stderr, "failed to open stream output context, stream will not work\n"); + return 1; + } + rtmp_server_conn = true; + } + + //use selected codec + AVCodecID codec_id = AV_CODEC_ID_H264; + out_codec = avcodec_find_encoder(codec_id); + if (!(out_codec)) { + fprintf(stderr, "Could not find encoder for '%s'\n", + avcodec_get_name(codec_id)); + return 1; + } + + out_stream = avformat_new_stream(format_ctx, out_codec); + if (!out_stream) { + fprintf(stderr, "Could not allocate stream\n"); + return 1; + } + + out_codec_ctx = avcodec_alloc_context3(out_codec); + + if(set_options_and_open_encoder(format_ctx, out_stream, out_codec_ctx, out_codec, config.profile, + config.dst_width, config.dst_height, config.fps, config.bitrate, config.gop_size, codec_id)) { + return 1; + } + + out_stream->codecpar->extradata_size = out_codec_ctx->extradata_size; + out_stream->codecpar->extradata = static_cast(av_mallocz(out_codec_ctx->extradata_size)); + memcpy(out_stream->codecpar->extradata, out_codec_ctx->extradata, out_codec_ctx->extradata_size); + + av_dump_format(format_ctx, 0, config.server.c_str(), 1); + + picture.init(out_codec_ctx->pix_fmt, config.dst_width, config.dst_height); + scaler.init(out_codec_ctx, config.src_width, config.src_height,config.dst_width, config.dst_height, SWS_BILINEAR); + + if (avformat_write_header(format_ctx, nullptr) < 0) + { + fprintf(stderr, "Could not write header!\n"); + return 1; + } + + printf("stream time base = %d / %d \n", out_stream->time_base.num, out_stream->time_base.den); + + inv_stream_timebase = (double)out_stream->time_base.den/(double)out_stream->time_base.num; + + init_ok = true; + return 0; +} + +} // namespace streamer diff --git a/src/streamer.hpp b/src/streamer.hpp new file mode 100644 index 00000000000..b3ae5f540d8 --- /dev/null +++ b/src/streamer.hpp @@ -0,0 +1,199 @@ +#ifndef STREAMER_HPP +#define STREAMER_HPP + +#ifdef _WIN32 +//Windows +extern "C" { +#include "libavutil/opt.h" +#include "libavcodec/avcodec.h" +#include "libavutil/channel_layout.h" +#include "libavutil/common.h" +#include "libavutil/imgutils.h" +#include "libavutil/mathematics.h" +#include "libavutil/samplefmt.h" + +#include "libavformat/avformat.h" +#include "libavcodec/avcodec.h" +#include "libavutil/imgutils.h" +#include "libswscale/swscale.h" +}; +#else +//Linux... +#ifdef __cplusplus +extern "C" { +#endif +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#ifdef __cplusplus +}; +#endif +#endif + +#include + + +namespace streamer +{ + + +class Scaler +{ +public: + SwsContext *ctx; + + Scaler() + { + ctx = nullptr; + } + + ~Scaler() + { + if(ctx) { + sws_freeContext(ctx); + } + } + + int init(AVCodecContext *codec_ctx, int src_width, int src_height, int dst_width, int dst_height, int flags) + { + ctx = sws_getContext(src_width, src_height, AV_PIX_FMT_BGR24, dst_width, dst_height, + codec_ctx->pix_fmt, flags, nullptr, nullptr, nullptr); + if(!ctx) { + fprintf(stderr, "Could not initialize sample scaler!\n"); + return 1; + } + return 0; + } +}; + + + +class Picture +{ + static const int align_frame_buffer = 32; +public: + + AVFrame *frame; + uint8_t *data; + + int init(enum AVPixelFormat pix_fmt, int width, int height) + { + frame = nullptr; + data = nullptr; + frame = av_frame_alloc(); + + int sz = av_image_get_buffer_size(pix_fmt, width, height, align_frame_buffer); + int ret = posix_memalign(reinterpret_cast(&data), align_frame_buffer, sz); + + av_image_fill_arrays(frame->data, frame->linesize, data, pix_fmt, width, height, align_frame_buffer); + frame->format = pix_fmt; + frame->width = width; + frame->height = height; + + return ret; + } + + Picture() + { + frame = nullptr; + data = nullptr; + } + + + ~Picture() + { + if(data) { + free(data); + data = nullptr; + } + + if(frame) { + av_frame_free(&frame); + } + } +}; + + +struct StreamerConfig +{ + int src_width; + int src_height; + int dst_width; + int dst_height; + int fps; + int bitrate; + int gop_size; + std::string profile; + std::string server; + + StreamerConfig() + { + dst_width = 0; + dst_height = 0; + src_width = 0; + src_height = 0; + fps = 0; + bitrate = 0; + gop_size = 12; + } + + StreamerConfig(int source_width, int source_height, int stream_width, int stream_height, int stream_fps, int stream_bitrate, int stream_gop_size, + const std::string &stream_profile, + const std::string &stream_server) + { + src_width = source_width; + src_height = source_height; + dst_width = stream_width; + dst_height = stream_height; + fps = stream_fps; + bitrate = stream_bitrate; + gop_size = stream_gop_size; + profile = stream_profile; + server = stream_server; + } +}; + + +class Streamer +{ + bool network_init_ok; + bool rtmp_server_conn; + bool init_ok; + + AVFormatContext *format_ctx; + AVCodec *out_codec; + AVStream *out_stream; + AVCodecContext *out_codec_ctx; + + Scaler scaler; + Picture picture; + + void cleanup(); + bool can_stream() + { + return network_init_ok && rtmp_server_conn && init_ok; + } + +public: + double inv_stream_timebase; + StreamerConfig config; + Streamer(); + ~Streamer(); + void enable_av_debug_log(); + int init(const StreamerConfig &streamer_config); + void stream_frame(const uint8_t *data); + void stream_frame(const uint8_t *data, int64_t frame_duration); + +}; + +} // namespace streamer +#endif From f24a5fe38ce29295ba9a125738a41086330cd0e1 Mon Sep 17 00:00:00 2001 From: edward <414252595@qq.com> Date: Fri, 25 Sep 2020 10:19:41 +0800 Subject: [PATCH 02/20] add RTMP stream function with FFMPEG --- Makefile | 4 ++-- src/stream.cpp | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 102ea08d6c8..d2af5a0312c 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ GPU=0 CUDNN=0 CUDNN_HALF=0 -OPENCV=0 +OPENCV=1 AVX=0 OPENMP=0 -LIBSO=0 +LIBSO=1 ZED_CAMERA=0 ZED_CAMERA_v2_8=0 diff --git a/src/stream.cpp b/src/stream.cpp index 0a60b2e61f1..ff68d02d12b 100644 --- a/src/stream.cpp +++ b/src/stream.cpp @@ -245,7 +245,7 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in demo_thresh = thresh; demo_ext_output = ext_output; demo_json_port = json_port; - printf("Demo\n"); + printf("Stream\n"); net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 if(weightfile){ load_weights(&net, weightfile); @@ -315,11 +315,12 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in } int count = 0; + /* if(!prefix && !dont_show){ int full_screen = 0; //create_window_cv("Demo", full_screen, 1352, 1013); } - + */ write_cv* output_video_writer = NULL; if (out_filename && !flag_exit) @@ -575,6 +576,6 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in int benchmark, int benchmark_layers, int stream_bitrate, char *dst_stream_addr, int dst_frame_width, int dst_frame_height, char *stream_profile, int stream_gop_size, int stream_fps) { - fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); + fprintf(stderr, "Stream needs OpenCV for webcam images.\n"); } #endif From 8437b6cb8f97083c4f24de29e0aa620e2c3c2b32 Mon Sep 17 00:00:00 2001 From: edward <414252595@qq.com> Date: Fri, 25 Sep 2020 11:52:02 +0800 Subject: [PATCH 03/20] add RTMP stream function with FFMPEG --- Makefile | 15 +++++++++++++-- src/detector.c | 8 +++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index d2af5a0312c..485e8c4e58b 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ OPENMP=0 LIBSO=1 ZED_CAMERA=0 ZED_CAMERA_v2_8=0 +STREAM=1 # set GPU=1 and CUDNN=1 to speedup on GPU # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher @@ -60,7 +61,7 @@ APPNAMESO=uselib endif ifeq ($(USE_CPP), 1) -CC=g++ -std=c++11 +CC=g++ else CC=gcc endif @@ -72,6 +73,13 @@ LDFLAGS= -lm -pthread -L/usr/local/lib -L/usr/local/Cellar/ffmpeg/4.1.3/lib -lav COMMON= -Iinclude/ -I3rdparty/stb/include -I/usr/local/include -I/usr/local/Cellar/ffmpeg/4.1.3/include CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas #-fPIC -lavformat -lavcodec -lavutil -lswscale +ifeq ($(STREAM), 1) +COMMON+= -DSTREAM +CFLAGS+= -DSTREAM +LDFLAGS+= `pkg-config --libs libavformat libavcodec libavutil libswscale 2>/dev/null` +COMMON+= `pkg-config --cflags libavformat libavcodec libavutil libswscale 2>/dev/null` +endif + ifeq ($(DEBUG), 1) #OPTS= -O0 -g #OPTS= -Og -g @@ -143,11 +151,14 @@ LDFLAGS+= -L/usr/local/zed/lib -lsl_zed endif endif -OBJ=stream.o streamer.o image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o +OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o endif +ifeq ($(STREAM), 1) +OBJ+=stream.o streamer.o +endif OBJS = $(addprefix $(OBJDIR), $(OBJ)) DEPS = $(wildcard src/*.h) Makefile include/darknet.h diff --git a/src/detector.c b/src/detector.c index 4f05e0077ee..4e0ae6fd06d 100644 --- a/src/detector.c +++ b/src/detector.c @@ -8,7 +8,10 @@ #include "box.h" #include "demo.h" #include "option_list.h" + +#ifdef STREAM #include "stream.h" +#endif #ifndef __COMPAR_FN_T #define __COMPAR_FN_T @@ -2033,11 +2036,14 @@ void run_detector(int argc, char **argv) if (0 == strcmp(argv[2], "demo")){ demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename, mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers); - }else if (0 == strcmp(argv[2], "stream")){ + } +#ifdef STREAM + if (0 == strcmp(argv[2], "stream")){ stream(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename, mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers, stream_bitrate, stream_addr, stream_frame_width, stream_frame_height, stream_profile, stream_gop_size, stream_fps); } +#endif free_list_contents_kvp(options); free_list(options); } From 9009113daa81ae739fc1363e7a4e00a22f81a67c Mon Sep 17 00:00:00 2001 From: edward <414252595@qq.com> Date: Fri, 25 Sep 2020 11:54:40 +0800 Subject: [PATCH 04/20] minor fix --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 485e8c4e58b..484d9bff74b 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,13 @@ GPU=0 CUDNN=0 CUDNN_HALF=0 -OPENCV=1 +OPENCV=0 AVX=0 OPENMP=0 -LIBSO=1 +LIBSO=0 ZED_CAMERA=0 ZED_CAMERA_v2_8=0 -STREAM=1 +STREAM=0 # set GPU=1 and CUDNN=1 to speedup on GPU # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher From 0c0befc96f89e75adf0daa696ddc996298855087 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Mon, 28 Sep 2020 11:49:07 +0800 Subject: [PATCH 05/20] minor fix --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 484d9bff74b..46d88624add 100644 --- a/Makefile +++ b/Makefile @@ -172,8 +172,7 @@ $(LIBNAMESO): $(OBJDIR) $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp $(CPP) -shared -std=c++11 -fvisibility=hidden -DLIB_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS) $(APPNAMESO): $(LIBNAMESO) include/yolo_v2_class.hpp src/yolo_console_dll.cpp - #$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO) - $(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ $(LIBNAMESO) + $(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO) endif $(EXEC): $(OBJS) From 3aef7cfb2abec7b198a0f47ad73908513dc35112 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Mon, 28 Sep 2020 13:59:58 +0800 Subject: [PATCH 06/20] minor fix --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 46d88624add..dc0cd56a088 100644 --- a/Makefile +++ b/Makefile @@ -69,9 +69,9 @@ endif CPP=g++ -std=c++11 NVCC=nvcc OPTS=-Ofast -LDFLAGS= -lm -pthread -L/usr/local/lib -L/usr/local/Cellar/ffmpeg/4.1.3/lib -lavformat -lavcodec -lavutil -lswscale -COMMON= -Iinclude/ -I3rdparty/stb/include -I/usr/local/include -I/usr/local/Cellar/ffmpeg/4.1.3/include -CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas #-fPIC -lavformat -lavcodec -lavutil -lswscale +LDFLAGS= -lm -pthread +COMMON= -Iinclude/ -I3rdparty/stb/include +CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC ifeq ($(STREAM), 1) COMMON+= -DSTREAM From 0d0225faaef18eef30b3ab67386fce6f7eb4bb5f Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Mon, 28 Sep 2020 18:33:19 +0800 Subject: [PATCH 07/20] add reference --- src/streamer.cpp | 1 + src/streamer.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/streamer.cpp b/src/streamer.cpp index bc1de65bce7..615352ecb4c 100644 --- a/src/streamer.cpp +++ b/src/streamer.cpp @@ -1,3 +1,4 @@ +//from https://github.com/andreanobile/opencv_ffmpeg_streaming/ #include "streamer.hpp" #include diff --git a/src/streamer.hpp b/src/streamer.hpp index b3ae5f540d8..377cbd193d7 100644 --- a/src/streamer.hpp +++ b/src/streamer.hpp @@ -1,3 +1,4 @@ +// from https://github.com/andreanobile/opencv_ffmpeg_streaming #ifndef STREAMER_HPP #define STREAMER_HPP From 7703fac445d7534b9940492b1a84fc8aa4fa37a5 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Tue, 8 Dec 2020 15:43:58 +0800 Subject: [PATCH 08/20] reduce latency of input stream from IP camera --- Makefile | 27 +++-- src/demo.c | 29 +++++- src/detector.c | 4 + src/image_ffmpeg.cpp | 240 +++++++++++++++++++++++++++++++++++++++++++ src/image_ffmpeg.h | 24 +++++ src/stream.cpp | 26 ++++- 6 files changed, 334 insertions(+), 16 deletions(-) create mode 100644 src/image_ffmpeg.cpp create mode 100644 src/image_ffmpeg.h diff --git a/Makefile b/Makefile index dc4b9cb9102..886df70cf8a 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,14 @@ -GPU=0 -CUDNN=0 +GPU=1 +CUDNN=1 CUDNN_HALF=0 -OPENCV=0 +OPENCV=1 AVX=0 OPENMP=0 -LIBSO=0 +LIBSO=1 ZED_CAMERA=0 ZED_CAMERA_v2_8=0 STREAM=0 +FFMPEG=1 # set GPU=1 and CUDNN=1 to speedup on GPU # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher @@ -16,7 +17,7 @@ STREAM=0 # set ZED_CAMERA_v2_8=1 to enable ZED SDK 2.X USE_CPP=0 -DEBUG=0 +DEBUG=1 ARCH= -gencode arch=compute_30,code=sm_30 \ -gencode arch=compute_35,code=sm_35 \ @@ -72,9 +73,9 @@ endif CPP=g++ -std=c++11 NVCC=nvcc OPTS=-Ofast -LDFLAGS= -lm -pthread +LDFLAGS=-L/usr/local/lib -lm -pthread COMMON= -Iinclude/ -I3rdparty/stb/include -CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC +CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -fpermissive ifeq ($(STREAM), 1) COMMON+= -DSTREAM @@ -83,8 +84,15 @@ LDFLAGS+= `pkg-config --libs libavformat libavcodec libavutil libswscale 2>/dev/ COMMON+= `pkg-config --cflags libavformat libavcodec libavutil libswscale 2>/dev/null` endif +ifeq ($(FFMPEG), 1) +COMMON+= -DFFMPEG +CFLAGS+= -DFFMPEG +LDFLAGS+= `pkg-config --libs libswresample libswscale libavutil libavcodec libavformat 2>/dev/null` +COMMON+= `pkg-config --cflags libswresample libswscale libavutil libavcodec libavformat 2>/dev/null` +endif + ifeq ($(DEBUG), 1) -#OPTS= -O0 -g +OPTS= -O0 -g #OPTS= -Og -g COMMON+= -DDEBUG CFLAGS+= -DDEBUG @@ -162,6 +170,9 @@ endif ifeq ($(STREAM), 1) OBJ+=stream.o streamer.o endif +ifeq ($(FFMPEG), 1) +OBJ+=image_ffmpeg.o +endif OBJS = $(addprefix $(OBJDIR), $(OBJ)) DEPS = $(wildcard src/*.h) Makefile include/darknet.h diff --git a/src/demo.c b/src/demo.c index 604b6990bcf..c7bf4a5debe 100644 --- a/src/demo.c +++ b/src/demo.c @@ -15,6 +15,10 @@ #include #endif +#ifdef FFMPEG +#include "image_ffmpeg.h" +#endif + #ifdef OPENCV #include "http_stream.h" @@ -53,6 +57,9 @@ static const int thread_wait_ms = 1; static volatile int run_fetch_in_thread = 0; static volatile int run_detect_in_thread = 0; +#ifdef FFMPEG +static int input_is_stream = 0; +#endif void *fetch_in_thread(void *ptr) { @@ -62,10 +69,16 @@ void *fetch_in_thread(void *ptr) this_thread_yield(); } int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream - if (letter_box) + if (letter_box){ in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream); - else + }else{ +#ifdef FFMPEG + if (input_is_stream) in_s = get_image_from_ffmpeg_stream_resize(&in_img, net.w, net.h, net.c); + else in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream); +#else in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream); +#endif + } if (!in_s.data) { printf("Stream closed.\n"); custom_atomic_store_int(&flag_exit, 1); @@ -107,13 +120,13 @@ void *detect_in_thread(void *ptr) dets = get_network_boxes(&net, get_width_mat(in_img), get_height_mat(in_img), demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box else dets = get_network_boxes(&net, net.w, net.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized - + //const float nms = .45; //if (nms) { // if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); // else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); //} - + custom_atomic_store_int(&run_detect_in_thread, 0); } @@ -165,6 +178,10 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int if(filename){ printf("video file: %s\n", filename); +#ifdef FFMPEG + open_video_stream(filename); + input_is_stream = 1; +#endif cap = get_capture_video_stream(filename); }else{ printf("Webcam index: %d\n", cam_index); @@ -335,7 +352,6 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int write_frame_cv(output_video_writer, show_img); printf("\n cvWriteFrame \n"); } - while (custom_atomic_load_int(&run_detect_in_thread)) { if(avg_fps > 180) this_thread_yield(); else this_thread_sleep_for(thread_wait_ms); // custom_join(detect_thread, 0); @@ -383,6 +399,9 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int start_time = get_time_point(); } } +#ifdef FFMPEG + av_pkt_unref(); +#endif } printf("input video stream closed. \n"); if (output_video_writer) { diff --git a/src/detector.c b/src/detector.c index fecd4a500e0..88a04acecb7 100644 --- a/src/detector.c +++ b/src/detector.c @@ -13,6 +13,10 @@ #include "stream.h" #endif +#ifdef FFMPEG +#include "image_ffmpeg.h" +#endif + #ifndef __COMPAR_FN_T #define __COMPAR_FN_T typedef int (*__compar_fn_t)(const void*, const void*); diff --git a/src/image_ffmpeg.cpp b/src/image_ffmpeg.cpp new file mode 100644 index 00000000000..03f33dcf5eb --- /dev/null +++ b/src/image_ffmpeg.cpp @@ -0,0 +1,240 @@ +#include "image.h" +#include +#include +#include "darknet.h" + +#include "image_opencv.h" +#include "image_ffmpeg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "libavcodec/avcodec.h" +#include "libavdevice/avdevice.h" +#include "libavformat/avformat.h" +#include "libavfilter/avfilter.h" +#include "libavutil/avutil.h" +#include "libavutil/time.h" +#include "libswscale/swscale.h" +#include "libavutil/pixdesc.h" + +#ifdef __cplusplus +} +#endif + +#pragma comment(lib, "avformat.lib") +#pragma comment(lib, "avdevice.lib") +#pragma comment(lib, "avcodec.lib") +#pragma comment(lib, "avutil.lib") +#pragma comment(lib, "avfilter.lib") +#pragma comment(lib, "swscale.lib") +#pragma comment(lib, "swresample.lib") +#pragma comment(lib, "postproc.lib") + +using namespace std; +using namespace cv; + +image mat_to_image(cv::Mat mat) +{ + int w = mat.cols; + int h = mat.rows; + int c = mat.channels(); + image im = make_image(w, h, c); + unsigned char *data = (unsigned char *)mat.data; + int step = mat.step; + for (int y = 0; y < h; ++y) { + for (int k = 0; k < c; ++k) { + for (int x = 0; x < w; ++x) { + //uint8_t val = mat.ptr(y)[c * x + k]; + //uint8_t val = mat.at(y, x).val[k]; + //im.data[k*w*h + y*w + x] = val / 255.0f; + im.data[k*w*h + y*w + x] = data[y*step + x*c + k] / 255.0f; + } + } + } + return im; +} + +#ifdef __cplusplus +extern "C" { +#endif + +bool nRestart = false; +AVFormatContext *ifmt_ctx = NULL; +AVStream *pVst; +AVCodecContext *pVideoCodecCtx = NULL; +AVCodec *pVideoCodec = NULL; +IplImage *pRgbImg; +AVFrame *pFrame = av_frame_alloc(); +int got_picture; +//uint8_t* buffer_rgb = NULL; +AVFrame *pFrameRGB = av_frame_alloc(); +SwsContext *img_convert_ctx = NULL; +//cv::Mat Img; +AVDictionary *optionsDict = NULL; +char errbuf[64]; +unsigned int i; +AVStream *st = NULL; +AVPacket pkt; +int video_st_index = -1; +int audio_st_index = -1; +int ret; + +#ifdef FFMPEG +void close_stream() +{ + if (NULL != ifmt_ctx) { + avformat_close_input(&ifmt_ctx); + ifmt_ctx = NULL; + } + exit(0); +} + +extern "C" void av_pkt_unref() +{ + av_packet_unref(&pkt); +} + +extern "C" void open_video_stream(const char *filename) +{ + av_register_all(); // Register all codecs and formats so that they can be used. + avformat_network_init(); // Initialization of network components + av_dict_set(&optionsDict, "rtsp_transport", "tcp", 0); //采用tcp传输 ,,如果不设置这个有些rtsp流就会卡着 + av_dict_set(&optionsDict, "stimeout", "2000000", 0); //如果没有设置stimeout + + av_init_packet(&pkt); // initialize packet. + pkt.data = NULL; + pkt.size = 0; + + if ((ret = avformat_open_input(&ifmt_ctx, filename, 0, &optionsDict)) < 0) { // Open the input file for reading. + printf("Could not open input file '%s' (error '%s')\n", filename, av_make_error_string(errbuf, sizeof(errbuf), ret)); + close_stream(); + } + + if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) { // Get information on the input file (number of streams etc.). + printf("Could not open find stream info (error '%s')\n", av_make_error_string(errbuf, sizeof(errbuf), ret)); + close_stream(); + } + + for (i = 0; i < ifmt_ctx->nb_streams; i++) { // dump information + av_dump_format(ifmt_ctx, i, filename, 0); + } + + for (i = 0; i < ifmt_ctx->nb_streams; i++) { // find video stream index + st = ifmt_ctx->streams[i]; + switch (st->codec->codec_type) { + case AVMEDIA_TYPE_AUDIO: audio_st_index = i; break; + case AVMEDIA_TYPE_VIDEO: video_st_index = i; break; + default: break; + } + } + if (-1 == video_st_index) { + printf("No H.264 video stream in the input file\n"); + close_stream(); + } +} + +extern "C" image get_image_from_ffmpeg_stream_resize(mat_cv** in_image, int w, int h, int c) +{ + cv::Mat *mat = NULL; + image empty_im = make_empty_image(0,0,0); + + //cv::Mat Img; + + do{ + ret = av_read_frame(ifmt_ctx, &pkt); // read frames + }while(ret == AVERROR(EAGAIN) || pkt.stream_index != video_st_index); + + if (ret < 0) { + printf("Could not read frame ---(error '%s')\n", av_make_error_string(errbuf, sizeof(errbuf), ret)); + close_stream(); + } + + if (pkt.stream_index == video_st_index) { // video frame + printf("Video Packet size = %d\n", pkt.size); + } + else if (pkt.stream_index == audio_st_index) { // audio frame + printf("Audio Packet size = %d\n", pkt.size); + } + else { + printf("Unknow Packet size = %d\n", pkt.size); + } + + //decode stream + if (!nRestart) + { + pVst = ifmt_ctx->streams[video_st_index]; + pVideoCodecCtx = pVst->codec; + pVideoCodec = avcodec_find_decoder(pVideoCodecCtx->codec_id); + if (pVideoCodec == NULL) + return empty_im; + //pVideoCodecCtx = avcodec_alloc_context3(pVideoCodec); + + if (avcodec_open2(pVideoCodecCtx, pVideoCodec, NULL) < 0) + return empty_im; + nRestart = true; + } + + if (pkt.stream_index == video_st_index) + { + fprintf(stdout, "pkt.size=%d,pkt.pts=%lld, pkt.data=0x%x.\n", pkt.size, pkt.pts, (unsigned int)pkt.data); + int av_result = avcodec_decode_video2(pVideoCodecCtx, pFrame, &got_picture, &pkt); + + if (got_picture) + { + fprintf(stdout, "decode one video frame!\n"); + } + + if (av_result < 0) + { + fprintf(stderr, "decode failed: inputbuf = 0x%x , input_framesize = %d\n", pkt.data, pkt.size); + return empty_im; + } + if (got_picture) + { + int bytes = avpicture_get_size(AV_PIX_FMT_RGB24, pVideoCodecCtx->width, pVideoCodecCtx->height); + uint8_t *buffer_rgb = (uint8_t *)av_malloc(bytes); + avpicture_fill((AVPicture *)pFrameRGB, buffer_rgb, AV_PIX_FMT_RGB24, pVideoCodecCtx->width, pVideoCodecCtx->height); + + img_convert_ctx = sws_getContext(pVideoCodecCtx->width, pVideoCodecCtx->height, pVideoCodecCtx->pix_fmt, + pVideoCodecCtx->width, pVideoCodecCtx->height, AV_PIX_FMT_BGR24, SWS_FAST_BILINEAR, NULL, NULL, NULL); + if (img_convert_ctx == NULL) + { + + printf("can't init convert context!\n"); + return empty_im; + } + sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0, pVideoCodecCtx->height, pFrameRGB->data, pFrameRGB->linesize); + pRgbImg = cvCreateImage(cvSize(pVideoCodecCtx->width, pVideoCodecCtx->height), 8, 3); + + memcpy(pRgbImg->imageData, buffer_rgb, pVideoCodecCtx->width * 3 * pVideoCodecCtx->height); + //image im = ipl_to_image(pRgbImg); + + mat = new cv::Mat(); + *mat = cvarrToMat(pRgbImg); + + //Img = cvarrToMat(pRgbImg); + cvReleaseImage(&pRgbImg); + sws_freeContext(img_convert_ctx); + av_free(buffer_rgb); + } + } + + //*(cv::Mat **)in_image = &Img; + *(cv::Mat **)in_image = mat; + + cv::Mat new_img = cv::Mat(h, w, CV_8UC(c)); + cv::resize(*mat, new_img, new_img.size(), 0, 0, cv::INTER_LINEAR); + cv::cvtColor(new_img, new_img, cv::COLOR_RGB2BGR); + image im = mat_to_image(new_img); + + return im; +} + + +#endif // FFMPEG + +#ifdef __cplusplus +} +#endif diff --git a/src/image_ffmpeg.h b/src/image_ffmpeg.h new file mode 100644 index 00000000000..48e574d80f6 --- /dev/null +++ b/src/image_ffmpeg.h @@ -0,0 +1,24 @@ +#ifndef FFMPEG_H +#define FFMPEG_H + +#include "image.h" +#include "matrix.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef FFMPEG + +void close_stream(); +void av_pkt_unref(); +void open_video_stream(const char *filename); +image get_image_from_ffmpeg_stream_resize(mat_cv** in_image, int w, int h, int c); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif //FFMPEG_H diff --git a/src/stream.cpp b/src/stream.cpp index ff68d02d12b..3f54edf1c51 100644 --- a/src/stream.cpp +++ b/src/stream.cpp @@ -15,6 +15,10 @@ #include #endif +#ifdef FFMPEG +#include "image_ffmpeg.h" +#endif + #ifdef OPENCV #include "http_stream.h" @@ -69,6 +73,10 @@ static const int thread_wait_ms = 1; static volatile int run_fetch_in_thread = 0; static volatile int run_detect_in_thread = 0; +#ifdef FFMPEG +static int input_is_stream = 0; +#endif + class MovingAverage { int size; @@ -160,10 +168,16 @@ void *fetch_in_thread(void *ptr) this_thread_yield(); } int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream - if (letter_box) + if (letter_box){ in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream); - else + }else{ +#ifdef FFMPEG + if (input_is_stream) in_s = get_image_from_ffmpeg_stream_resize(&in_img, net.w, net.h, net.c); + else in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream); +#else in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream); +#endif + } if (!in_s.data) { printf("Stream closed.\n"); custom_atomic_store_int(&flag_exit, 1); @@ -257,6 +271,10 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in if(filename){ printf("video file: %s\n", filename); +#ifdef FFMPEG + open_video_stream(filename); + input_is_stream = 1; +#endif cap = get_capture_video_stream(filename); }else{ printf("Webcam index: %d\n", cam_index); @@ -534,7 +552,9 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in //ok = video_capture.read(read_frame); time_prev = time_stop; - +#ifdef FFMPEG + av_pkt_unref(); +#endif } printf("input video stream closed. \n"); if (output_video_writer) { From 658621fd8f9a3d46775d87b4809ee883ee01d989 Mon Sep 17 00:00:00 2001 From: edwardxliu <44568088+edwardxliu@users.noreply.github.com> Date: Tue, 8 Dec 2020 15:48:53 +0800 Subject: [PATCH 09/20] Update README.md --- README.md | 689 +----------------------------------------------------- 1 file changed, 2 insertions(+), 687 deletions(-) diff --git a/README.md b/README.md index f1e6ab4ec2a..85f86e8f376 100644 --- a/README.md +++ b/README.md @@ -1,687 +1,2 @@ -# Yolo v4, v3 and v2 for Windows and Linux - -## (neural networks for object detection) - -Paper Yolo v4: https://arxiv.org/abs/2004.10934 - -More details: [medium link](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7) - -Manual: https://github.com/AlexeyAB/darknet/wiki - -Discussion: - - [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/) - - [Google-groups](https://groups.google.com/forum/#!forum/darknet) - - [Discord](https://discord.gg/zSq8rtW) - -About Darknet framework: http://pjreddie.com/darknet/ - -[![Darknet Continuous Integration](https://github.com/AlexeyAB/darknet/workflows/Darknet%20Continuous%20Integration/badge.svg)](https://github.com/AlexeyAB/darknet/actions?query=workflow%3A%22Darknet+Continuous+Integration%22) -[![CircleCI](https://circleci.com/gh/AlexeyAB/darknet.svg?style=svg)](https://circleci.com/gh/AlexeyAB/darknet) -[![TravisCI](https://travis-ci.org/AlexeyAB/darknet.svg?branch=master)](https://travis-ci.org/AlexeyAB/darknet) -[![Contributors](https://img.shields.io/github/contributors/AlexeyAB/Darknet.svg)](https://github.com/AlexeyAB/darknet/graphs/contributors) -[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE) -[![DOI](https://zenodo.org/badge/75388965.svg)](https://zenodo.org/badge/latestdoi/75388965) -[![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2004.10934-B31B1B.svg)](https://arxiv.org/abs/2004.10934) -[![colab](https://user-images.githubusercontent.com/4096485/86174089-b2709f80-bb29-11ea-9faf-3d8dc668a1a5.png)](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) -[![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg) - - -* [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo) -* [Requirements (and how to install dependecies)](#requirements) -* [Pre-trained models](#pre-trained-models) -* [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions) -* [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations) -* [Yolo v4 in other frameworks (TensorRT, TensorFlow, PyTorch, OpenVINO, OpenCV-dnn, TVM,...)](#yolo-v4-in-other-frameworks) -* [Datasets](#datasets) - -0. [Improvements in this repository](#improvements-in-this-repository) -1. [How to use](#how-to-use-on-the-command-line) -2. How to compile on Linux - * [Using cmake](#how-to-compile-on-linux-using-cmake) - * [Using make](#how-to-compile-on-linux-using-make) -3. How to compile on Windows - * [Using cmake](#how-to-compile-on-windows-using-cmake) - * [Using vcpkg](#how-to-compile-on-windows-using-vcpkg) - * [Legacy way](#how-to-compile-on-windows-legacy-way) -4. [Training and Evaluation of speed and accuracy on MS COCO](https://github.com/AlexeyAB/darknet/wiki#training-and-evaluation-of-speed-and-accuracy-on-ms-coco) -5. [How to train with multi-GPU:](#how-to-train-with-multi-gpu) -6. [How to train (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects) -7. [How to train tiny-yolo (to detect your custom objects)](#how-to-train-tiny-yolo-to-detect-your-custom-objects) -8. [When should I stop training](#when-should-i-stop-training) -9. [How to improve object detection](#how-to-improve-object-detection) -10. [How to mark bounded boxes of objects and create annotation files](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files) -11. [How to use Yolo as DLL and SO libraries](#how-to-use-yolo-as-dll-and-so-libraries) - -![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) - -![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934 - - -tkDNN-TensorRT accelerates YOLOv4 **~2x** times for batch=1 and **3x-4x** times for batch=4. -* tkDNN: https://github.com/ceccocats/tkDNN -* OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf - -#### GeForce RTX 2080 Ti: -| Network Size | Darknet, FPS (avg)| tkDNN TensorRT FP32, FPS | tkDNN TensorRT FP16, FPS | OpenCV FP16, FPS | tkDNN TensorRT FP16 batch=4, FPS | OpenCV FP16 batch=4, FPS | tkDNN Speedup | -|:-----:|:--------:|--------:|--------:|--------:|--------:|--------:|------:| -|320 | 100 | 116 | **202** | 183 | 423 | **430** | **4.3x** | -|416 | 82 | 103 | **162** | 159 | 284 | **294** | **3.6x** | -|512 | 69 | 91 | 134 | **138** | 206 | **216** | **3.1x** | -|608 | 53 | 62 | 103 | **115**| 150 | **150** | **2.8x** | -|Tiny 416 | 443 | 609 | **790** | 773 | **1774** | 1353 | **3.5x** | -|Tiny 416 CPU Core i7 7700HQ | 3.4 | - | - | 42 | - | 39 | **12x** | - -* Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png) -* Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png) -* CSPNet: [paper](https://arxiv.org/abs/1911.11929) and [map_fps](https://user-images.githubusercontent.com/4096485/71702416-6645dc00-2de0-11ea-8d65-de7d4b604021.png) comparison: https://github.com/WongKinYiu/CrossStagePartialNetworks -* Yolo v3 on MS COCO: [Speed / Accuracy (mAP@0.5) chart](https://user-images.githubusercontent.com/4096485/52151356-e5d4a380-2683-11e9-9d7d-ac7bc192c477.jpg) -* Yolo v3 on MS COCO (Yolo v3 vs RetinaNet) - Figure 3: https://arxiv.org/pdf/1804.02767v1.pdf -* Yolo v2 on Pascal VOC 2007: https://hsto.org/files/a24/21e/068/a2421e0689fb43f08584de9d44c2215f.jpg -* Yolo v2 on Pascal VOC 2012 (comp4): https://hsto.org/files/3a6/fdf/b53/3a6fdfb533f34cee9b52bdd9bb0b19d9.jpg - -#### Youtube video of results - -[![Yolo v4](http://img.youtube.com/vi/1_SiUOYUoOI/0.jpg)](https://youtu.be/1_SiUOYUoOI "Yolo v4") - -Others: https://www.youtube.com/user/pjreddie/videos - -#### How to evaluate AP of YOLOv4 on the MS COCO evaluation server - -1. Download and unzip test-dev2017 dataset from MS COCO server: http://images.cocodataset.org/zips/test2017.zip -2. Download list of images for Detection taks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt -3. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) -4. Content of the file `cfg/coco.data` should be - -```ini -classes= 80 -train = /trainvalno5k.txt -valid = /testdev2017.txt -names = data/coco.names -backup = backup -eval=coco -``` - -5. Create `/results/` folder near with `./darknet` executable file -6. Run validation: `./darknet detector valid cfg/coco.data cfg/yolov4.cfg yolov4.weights` -7. Rename the file `/results/coco_results.json` to `detections_test-dev2017_yolov4_results.json` and compress it to `detections_test-dev2017_yolov4_results.zip` -8. Submit file `detections_test-dev2017_yolov4_results.zip` to the MS COCO evaluation server for the `test-dev2019 (bbox)` - -#### How to evaluate FPS of YOLOv4 on GPU - -1. Compile Darknet with `GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=1` in the `Makefile` -2. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) -3. Get any .avi/.mp4 video file (preferably not more than 1920x1080 to avoid bottlenecks in CPU performance) -4. Run one of two commands and look at the AVG FPS: - -* include video_capturing + NMS + drawing_bboxes: - `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -dont_show -ext_output` -* exclude video_capturing + NMS + drawing_bboxes: - `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -benchmark` - -#### Pre-trained models - -There are weights-file for different cfg-files (trained for MS COCO dataset): - -FPS on RTX 2070 (R) and Tesla V100 (V): - -* [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934) - just change `width=` and `height=` parameters in `yolov4.cfg` file and use the same `yolov4.weights` file for all cases: - * `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops - * `width=512 height=512` in cfg: **64.9% mAP@0.5 (43.0% AP@0.5:0.95) - 45(R) FPS / 83(V) FPS** - 91.1 BFlops - * `width=416 height=416` in cfg: **62.8% mAP@0.5 (41.2% AP@0.5:0.95) - 55(R) FPS / 96(V) FPS** - 60.1 BFlops - * `width=320 height=320` in cfg: **60% mAP@0.5 ( 38% AP@0.5:0.95) - 63(R) FPS / 123(V) FPS** - 35.5 BFlops - -* [yolov4-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg) - **40.2% mAP@0.5 - 371(1080Ti) FPS / 330(RTX2070) FPS** - 6.9 BFlops - 23.1 MB: [yolov4-tiny.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights) - -* [enet-coco.cfg (EfficientNetB0-Yolov3)](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/enet-coco.cfg) - **45.5% mAP@0.5 - 55(R) FPS** - 3.7 BFlops - 18.3 MB: [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view) - -* [yolov3-openimages.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-openimages.cfg) - 247 MB - 18(R) FPS - OpenImages dataset: [yolov3-openimages.weights](https://pjreddie.com/media/files/yolov3-openimages.weights) - -
CLICK ME - Yolo v3 models - -* [csresnext50-panet-spp-original-optimal.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp-original-optimal.cfg) - **65.4% mAP@0.5 (43.2% AP@0.5:0.95) - 32(R) FPS** - 100.5 BFlops - 217 MB: [csresnext50-panet-spp-original-optimal_final.weights](https://drive.google.com/open?id=1_NnfVgj0EDtb_WLNoXV8Mo7WKgwdYZCc) - -* [yolov3-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg) - **60.6% mAP@0.5 - 38(R) FPS** - 141.5 BFlops - 240 MB: [yolov3-spp.weights](https://pjreddie.com/media/files/yolov3-spp.weights) - -* [csresnext50-panet-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp.cfg) - **60.0% mAP@0.5 - 44 FPS** - 71.3 BFlops - 217 MB: [csresnext50-panet-spp_final.weights](https://drive.google.com/file/d/1aNXdM8qVy11nqTcd2oaVB3mf7ckr258-/view?usp=sharing) - -* [yolov3.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg) - **55.3% mAP@0.5 - 66(R) FPS** - 65.9 BFlops - 236 MB: [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) - -* [yolov3-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg) - **33.1% mAP@0.5 - 345(R) FPS** - 5.6 BFlops - 33.7 MB: [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) - -* [yolov3-tiny-prn.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny-prn.cfg) - **33.1% mAP@0.5 - 370(R) FPS** - 3.5 BFlops - 18.8 MB: [yolov3-tiny-prn.weights](https://drive.google.com/file/d/18yYZWyKbo4XSDVyztmsEcF9B_6bxrhUY/view?usp=sharing) - -
- -
CLICK ME - Yolo v2 models - -* `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights -* `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights -* `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights -* `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights -* `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights - -
- -Put it near compiled: darknet.exe - -You can get cfg-files by path: `darknet/cfg/` - -### Requirements - -* Windows or Linux -* **CMake >= 3.12**: https://cmake.org/download/ -* **CUDA >= 10.0**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)) -* **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png)) -* **cuDNN >= 7.0** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows ) -* **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported -* on Linux **GCC or Clang**, on Windows **MSVC 2017/2019** https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community - -#### Yolo v4 in other frameworks - -* **TensorFlow:** YOLOv4 on TensorFlow 2.0 / TFlite / Andriod: https://github.com/hunglc007/tensorflow-yolov4-tflite - For YOLOv3 - convert `yolov3.weights`/`cfg` files to `yolov3.ckpt`/`pb/meta`: by using [mystic123](https://github.com/mystic123/tensorflow-yolo-v3) project, and [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format) -* **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150) -* **Intel OpenVINO 2020 R4:** (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) ) -* **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn -* **PyTorch > ONNX**: - * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4) - * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch) - * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4) -* **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/ -* **TensorRT** YOLOv4 on TensorRT+tkDNN: https://github.com/ceccocats/tkDNN - For YOLOv3 (-70% faster inference): [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) read [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx) implemented yolov3-spp, yolov4, etc. -* **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream -* **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/) -* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backends (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about -* **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite -* **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron - -#### Datasets - -* MS COCO: use `./scripts/get_coco_dataset.sh` to get labeled MS COCO detection dataset -* OpenImages: use `python ./scripts/get_openimages_dataset.py` for labeling train detection dataset -* Pascal VOC: use `python ./scripts/voc_label.py` for labeling Train/Test/Val detection datasets -* ILSVRC2012 (ImageNet classification): use `./scripts/get_imagenet_train.sh` (also `imagenet_label.sh` for labeling valid set) -* German/Belgium/Russian/LISA/MASTIF Traffic Sign Datasets for Detection - use this parsers: https://github.com/angeligareta/Datasets2Darknet#detection-task -* List of other datasets: https://github.com/AlexeyAB/darknet/tree/master/scripts#datasets - -### Improvements in this repository - -* developed State-of-the-Art object detector YOLOv4 -* added State-of-Art models: CSP, PRN, EfficientNet -* added layers: [conv_lstm], [scale_channels] SE/ASFF/BiFPN, [local_avgpool], [sam], [Gaussian_yolo], [reorg3d] (fixed [reorg]), fixed [batchnorm] -* added the ability for training recurrent models (with layers conv-lstm`[conv_lstm]`/conv-rnn`[crnn]`) for accurate detection on video -* added data augmentation: `[net] mixup=1 cutmix=1 mosaic=1 blur=1`. Added activations: SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX -* added the ability for training with GPU-processing using CPU-RAM to increase the mini_batch_size and increase accuracy (instead of batch-norm sync) -* improved binary neural network performance **2x-4x times** for Detection on CPU and GPU if you trained your own weights by using this XNOR-net model (bit-1 inference) : https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov3-tiny_xnor.cfg -* improved neural network performance **~7%** by fusing 2 layers into 1: Convolutional + Batch-norm -* improved performance: Detection **2x times**, on GPU Volta/Turing (Tesla V100, GeForce RTX, ...) using Tensor Cores if `CUDNN_HALF` defined in the `Makefile` or `darknet.sln` -* improved performance **~1.2x** times on FullHD, **~2x** times on 4K, for detection on the video (file/stream) using `darknet detector demo`... -* improved performance **3.5 X times** of data augmentation for training (using OpenCV SSE/AVX functions instead of hand-written functions) - removes bottleneck for training on multi-GPU or GPU Volta -* improved performance of detection and training on Intel CPU with AVX (Yolo v3 **~85%**) -* optimized memory allocation during network resizing when `random=1` -* optimized GPU initialization for detection - we use batch=1 initially instead of re-init with batch=1 -* added correct calculation of **mAP, F1, IoU, Precision-Recall** using command `darknet detector map`... -* added drawing of chart of average-Loss and accuracy-mAP (`-map` flag) during training -* run `./darknet detector demo ... -json_port 8070 -mjpeg_port 8090` as JSON and MJPEG server to get results online over the network by using your soft or Web-browser -* added calculation of anchors for training -* added example of Detection and Tracking objects: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp -* run-time tips and warnings if you use incorrect cfg-file or dataset -* added support for Windows -* many other fixes of code... - -And added manual - [How to train Yolo v4-v2 (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects) - -Also, you might be interested in using a simplified repository where is implemented INT8-quantization (+30% speedup and -1% mAP reduced): https://github.com/AlexeyAB/yolo2_light - -#### How to use on the command line - -On Linux use `./darknet` instead of `darknet.exe`, like this:`./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights` - -On Linux find executable file `./darknet` in the root directory, while on Windows find it in the directory `\build\darknet\x64` - -* Yolo v4 COCO - **image**: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25` -* **Output coordinates** of objects: `darknet.exe detector test cfg/coco.data yolov4.cfg yolov4.weights -ext_output dog.jpg` -* Yolo v4 COCO - **video**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output test.mp4` -* Yolo v4 COCO - **WebCam 0**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -c 0` -* Yolo v4 COCO for **net-videocam** - Smart WebCam: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg` -* Yolo v4 - **save result videofile res.avi**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -out_filename res.avi` -* Yolo v3 **Tiny** COCO - video: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights test.mp4` -* **JSON and MJPEG server** that allows multiple connections from your soft or Web-browser `ip-address:8070` and 8090: `./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -json_port 8070 -mjpeg_port 8090 -ext_output` -* Yolo v3 Tiny **on GPU #1**: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights -i 1 test.mp4` -* Alternative method Yolo v3 COCO - image: `darknet.exe detect cfg/yolov4.cfg yolov4.weights -i 0 -thresh 0.25` -* Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): - `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` -* 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights` -* Remeber to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app -* To process a list of images `data/train.txt` and save results of detection to `result.json` file use: - `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt` -* To process a list of images `data/train.txt` and save results of detection to `result.txt` use: - `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt` -* Pseudo-lableing - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `.txt` (in this way you can increase the amount of training data) use: - `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt` -* To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` -* To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` -* To check accuracy mAP@IoU=75: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights -iou_thresh 0.75` - -##### For using network video-camera mjpeg-stream with any Android smartphone - -1. Download for Android phone mjpeg-stream soft: IP Webcam / Smart WebCam - - * Smart WebCam - preferably: https://play.google.com/store/apps/details?id=com.acontech.android.SmartWebCam2 - * IP Webcam: https://play.google.com/store/apps/details?id=com.pas.webcam - -2. Connect your Android phone to computer by WiFi (through a WiFi-router) or USB -3. Start Smart WebCam on your phone -4. Replace the address below, on shown in the phone application (Smart WebCam) and launch: - -* Yolo v4 COCO-model: `darknet.exe detector demo data/coco.data yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0` - -### How to compile on Linux/macOS (using `CMake`) - -The `CMakeLists.txt` will attempt to find installed optional dependencies like CUDA, cudnn, ZED and build against those. It will also create a shared object library file to use `darknet` for code development. - -Open a shell terminal inside the cloned repository and launch: - -```bash -./build.sh -``` - -### How to compile on Linux (using `make`) - -Just do `make` in the darknet directory. (You can try to compile and run it on Google Colab in cloud [link](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) (press «Open in Playground» button at the top-left corner) and watch the video [link](https://www.youtube.com/watch?v=mKAEGSxwOAY) ) -Before make, you can set such options in the `Makefile`: [link](https://github.com/AlexeyAB/darknet/blob/9c1b9a2cf6363546c152251be578a21f3c3caec6/Makefile#L1) - -* `GPU=1` to build with CUDA to accelerate by using GPU (CUDA should be in `/usr/local/cuda`) -* `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`) -* `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x -* `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams -* `DEBUG=1` to bould debug version of Yolo -* `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU -* `LIBSO=1` to build a library `darknet.so` and binary runable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp - or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4` -* `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run - `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera` -* You also need to specify for which graphics card the code is generated. This is done by setting `ARCH=`. If you use a never version than CUDA 11 you further need to edit line 20 from Makefile and remove `-gencode arch=compute_30,code=sm_30 \` as Kepler GPU support was dropped in CUDA 11. You can also drop the general `ARCH=` and just uncomment `ARCH=` for your graphics card. - -To run Darknet on Linux use examples from this article, just use `./darknet` instead of `darknet.exe`, i.e. use this command: `./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights` - -### How to compile on Windows (using `CMake`) - -This is the recommended approach to build Darknet on Windows. - -1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com) - -2. Install CUDA (at least v10.0) enabling VS Integration during installation. - -3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands: - -```PowerShell -PS Code\> git clone https://github.com/microsoft/vcpkg -PS Code\> cd vcpkg -PS Code\vcpkg> $env:VCPKG_ROOT=$PWD -PS Code\vcpkg> .\bootstrap-vcpkg.bat -PS Code\vcpkg> .\vcpkg install darknet[full]:x64-windows #replace with darknet[opencv-base,cuda,cudnn]:x64-windows for a quicker install of dependencies -PS Code\vcpkg> cd .. -PS Code\> git clone https://github.com/AlexeyAB/darknet -PS Code\> cd darknet -PS Code\darknet> .\build.ps1 -``` - -## How to train with multi-GPU - -1. Train it first on 1 GPU for like 1000 iterations: `darknet.exe detector train cfg/coco.data cfg/yolov4.cfg yolov4.conv.137` - -2. Then stop and by using partially-trained model `/backup/yolov4_1000.weights` run training with multigpu (up to 4 GPUs): `darknet.exe detector train cfg/coco.data cfg/yolov4.cfg /backup/yolov4_1000.weights -gpus 0,1,2,3` - -If you get a Nan, then for some datasets better to decrease learning rate, for 4 GPUs set `learning_rate = 0,00065` (i.e. learning_rate = 0.00261 / GPUs). In this case also increase 4x times `burn_in =` in your cfg-file. I.e. use `burn_in = 4000` instead of `1000`. - -https://groups.google.com/d/msg/darknet/NbJqonJBTSY/Te5PfIpuCAAJ - -## How to train (to detect your custom objects) - -(to train old Yolo v2 `yolov2-voc.cfg`, `yolov2-tiny-voc.cfg`, `yolo-voc.cfg`, `yolo-voc.2.0.cfg`, ... [click by the link](https://github.com/AlexeyAB/darknet/tree/47c7af1cea5bbdedf1184963355e6418cb8b1b4f#how-to-train-pascal-voc-data)) - -Training Yolo v4 (and v3): - -0. For training `cfg/yolov4-custom.cfg` download the pre-trained weights-file (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) ) - -1. Create file `yolo-obj.cfg` with the same content as in `yolov4-custom.cfg` (or copy `yolov4-custom.cfg` to `yolo-obj.cfg)` and: - -* change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3) -* change line subdivisions to [`subdivisions=16`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) -* change line max_batches to (`classes*2000` but not less than number of training images, but not less than number of training images and not less than `6000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes -* change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22) -* set network size `width=416 height=416` or any value multiple of 32: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9 -* change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers: - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610 - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696 - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783 -* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers. - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603 - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689 - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776 -* when using [`[Gaussian_yolo]`](https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L608) layers, change [`filters=57`] filters=(classes + 9)x3 in the 3 `[convolutional]` before each `[Gaussian_yolo]` layer - * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L604 - * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L696 - * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789 - -So if `classes=1` then should be `filters=18`. If `classes=2` then write `filters=21`. - -**(Do not write in the cfg-file: filters=(classes + 5)x3)** - -(Generally `filters` depends on the `classes`, `coords` and number of `mask`s, i.e. filters=`(classes + coords + 1)*`, where `mask` is indices of anchors. If `mask` is absence, then filters=`(classes + coords + 1)*num`) - -So for example, for 2 objects, your file `yolo-obj.cfg` should differ from `yolov4-custom.cfg` in such lines in each of **3** [yolo]-layers: - -```ini -[convolutional] -filters=21 - -[region] -classes=2 -``` - -2. Create file `obj.names` in the directory `build\darknet\x64\data\`, with objects names - each in new line - -3. Create file `obj.data` in the directory `build\darknet\x64\data\`, containing (where **classes = number of objects**): - - ```ini - classes = 2 - train = data/train.txt - valid = data/test.txt - names = data/obj.names - backup = backup/ - ``` - -4. Put image-files (.jpg) of your objects in the directory `build\darknet\x64\data\obj\` - -5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark - -It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: - -` ` - - Where: - * `` - integer object number from `0` to `(classes-1)` - * ` ` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]` - * for example: ` = / ` or ` = / ` - * atention: ` ` - are center of rectangle (are not top-left corner) - - For example for `img1.jpg` you will be created `img1.txt` containing: - - ``` - 1 0.716797 0.395833 0.216406 0.147222 - 0 0.687109 0.379167 0.255469 0.158333 - 1 0.420312 0.395833 0.140625 0.166667 - ``` - -6. Create file `train.txt` in directory `build\darknet\x64\data\`, with filenames of your images, each filename in new line, with path relative to `darknet.exe`, for example containing: - - ``` - data/obj/img1.jpg - data/obj/img2.jpg - data/obj/img3.jpg - ``` - -7. Download pre-trained weights for the convolutional layers and put to the directory `build\darknet\x64` - * for `yolov4.cfg`, `yolov4-custom.cfg` (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) ) - * for `yolov4-tiny.cfg`, `yolov4-tiny-3l.cfg`, `yolov4-tiny-custom.cfg` (19 MB): [yolov4-tiny.conv.29](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29) - * for `csresnext50-panet-spp.cfg` (133 MB): [csresnext50-panet-spp.conv.112](https://drive.google.com/file/d/16yMYCLQTY_oDlCIZPfn_sab6KD3zgzGq/view?usp=sharing) - * for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74) - * for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing) - * for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing) - - -8. Start training by using the command line: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137` - - To train on Linux use command: `./darknet detector train data/obj.data yolo-obj.cfg yolov4.conv.137` (just use `./darknet` instead of `darknet.exe`) - - * (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations) - * (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations) - * (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2) - * (to see the mAP & Loss-chart during training on remote server without GUI, use command `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` then open URL `http://ip-address:8090` in Chrome/Firefox browser) - -8.1. For training with mAP (mean average precisions) calculation for each 4 Epochs (set `valid=valid.txt` or `train.txt` in `obj.data` file) and run: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` - -9. After training is complete - get result `yolo-obj_final.weights` from path `build\darknet\x64\backup\` - - * After each 100 iterations you can stop and later start training from this point. For example, after 2000 iterations you can stop training, and later just start training using: `darknet.exe detector train data/obj.data yolo-obj.cfg backup\yolo-obj_2000.weights` - - (in the original repository https://github.com/pjreddie/darknet the weights-file is saved only once every 10 000 iterations `if(iterations > 1000)`) - - * Also you can get result earlier than all 45000 iterations. - - **Note:** If during training you see `nan` values for `avg` (loss) field - then training goes wrong, but if `nan` is in some other lines - then training goes well. - - **Note:** If you changed width= or height= in your cfg-file, then new width and height must be divisible by 32. - - **Note:** After training use such command for detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` - - **Note:** if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) - -### How to train tiny-yolo (to detect your custom objects): - -Do all the same steps as for the full yolo model as described above. With the exception of: -* Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29 - (Or get this file from yolov4-tiny.weights file by using command: `darknet.exe partial cfg/yolov4-tiny-custom.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29` -* Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg` -* Start training: `darknet.exe detector train data/obj.data yolov4-tiny-obj.cfg yolov4-tiny.conv.29` - -For training Yolo based on other models ([DenseNet201-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/densenet201_yolo.cfg) or [ResNet50-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/resnet50_yolo.cfg)), you can download and get pre-trained weights as showed in this file: https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/partial.cmd -If you made you custom model that isn't based on other models, then you can train it without pre-trained weights, then will be used random initial weights. - -## When should I stop training: - -Usually sufficient 2000 iterations for each class(object), but not less than number of training images and not less than 6000 iterations in total. But for a more precise definition when you should stop training, use the following manual: - -1. During training, you will see varying indicators of error, and you should stop when no longer decreases **0.XXXXXXX avg**: - - > Region Avg IOU: 0.798363, Class: 0.893232, Obj: 0.700808, No Obj: 0.004567, Avg Recall: 1.000000, count: 8 - > Region Avg IOU: 0.800677, Class: 0.892181, Obj: 0.701590, No Obj: 0.004574, Avg Recall: 1.000000, count: 8 - > - > **9002**: 0.211667, **0.60730 avg**, 0.001000 rate, 3.868000 seconds, 576128 images - > Loaded: 0.000000 seconds - - * **9002** - iteration number (number of batch) - * **0.60730 avg** - average loss (error) - **the lower, the better** - - When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset). - - Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. - -2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them: - -For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to overfitting. **Overfitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**: - -![Overfitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) - -To get weights from Early Stopping Point: - - 2.1. At first, in your file `obj.data` you must specify the path to the validation dataset `valid = valid.txt` (format of `valid.txt` as in `train.txt`), and if you haven't validation images, just copy `data\train.txt` to `data\valid.txt`. - - 2.2 If training is stopped after 9000 iterations, to validate some of previous weights use this commands: - -(If you use another GitHub repository, then use `darknet.exe detector recall`... instead of `darknet.exe detector map`...) - -* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` -* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights` -* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights` - -And comapre last output lines for each weights (7000, 8000, 9000): - -Choose weights-file **with the highest mAP (mean average precision)** or IoU (intersect over union) - -For example, **bigger mAP** gives weights `yolo-obj_8000.weights` - then **use this weights for detection**. - -Or just train with `-map` flag: - -`darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` - -So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calculated for each 4 Epochs using `valid=valid.txt` file that is specified in `obj.data` file (`1 Epoch = images_in_train_txt / batch` iterations) - -(to change the max x-axis value - change [`max_batches=`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) parameter to `2000*classes`, f.e. `max_batches=6000` for 3 classes) - -![loss_chart_map_chart](https://hsto.org/webt/yd/vl/ag/ydvlagutof2zcnjodstgroen8ac.jpeg) - -Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` - -* **IoU** (intersect over union) - average instersect over union of objects and detections for a certain threshold = 0.24 - -* **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf - -**mAP** is default metric of precision in the PascalVOC competition, **this is the same as AP50** metric in the MS COCO competition. -In terms of Wiki, indicators Precision and Recall have a slightly different meaning than in the PascalVOC competition, but **IoU always has the same meaning**. - -![precision_recall_iou](https://hsto.org/files/ca8/866/d76/ca8866d76fb840228940dbf442a7f06a.jpg) - - -### Custom object detection: - -Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` - -| ![Yolo_v2_training](https://hsto.org/files/d12/1e7/515/d121e7515f6a4eb694913f10de5f2b61.jpg) | ![Yolo_v2_training](https://hsto.org/files/727/c7e/5e9/727c7e5e99bf4d4aa34027bb6a5e4bab.jpg) | -|---|---| - -## How to improve object detection: - -1. Before training: - -* set flag `random=1` in your `.cfg`-file - it will increase precision by training Yolo for different resolutions: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L788) - -* increase network resolution in your `.cfg`-file (`height=608`, `width=608` or any value multiple of 32) - it will increase precision - -* check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark - -* my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong. - -* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more - -* desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects - -* What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected. - -* for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file) - -* for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = 23` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L895 - * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L892 - * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L989 - -* for training for both small and large objects use modified models: - * Full-model: 5 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3_5l.cfg - * Tiny-model: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny_3l.cfg - * YOLOv4: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-custom.cfg - -* If you train the model to distinguish Left and Right objects as separate classes (left/right hand, left/right-turn on road signs, ...) then for disabling flip data augmentation - add `flip=0` here: https://github.com/AlexeyAB/darknet/blob/3d2d0a7c98dbc8923d9ff705b81ff4f7940ea6ff/cfg/yolov3.cfg#L17 - -* General rule - your training dataset should include such a set of relative sizes of objects that you want to detect: - * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width` - * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height` - - I.e. for each object from Test dataset there must be at least 1 object in the Training dataset with the same class_id and about the same relative size: - - `object width in percent from Training dataset` ~= `object width in percent from Test dataset` - - That is, if only objects that occupied 80-90% of the image were present in the training set, then the trained network will not be able to detect objects that occupy 1-10% of the image. - -* to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file - -* each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used. - -* to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5. - -* Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file: -`darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` -then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But you should change indexes of anchors `masks=` for each [yolo]-layer, so for YOLOv4 the 1st-[yolo]-layer has anchors smaller than 30x30, 2nd smaller than 60x60, 3rd remaining, and vice versa for YOLOv3. Also you should change the `filters=(classes + 5)*` before each [yolo]-layer. If many of the calculated anchors do not fit under the appropriate layers - then just try using all the default anchors. - -2. After training - for detection: - -* Increase network-resolution by set in your `.cfg`-file (`height=608` and `width=608`) or (`height=832` and `width=832`) or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9) - -* it is not necessary to train the network again, just use `.weights`-file already trained for 416x416 resolution - -* to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) - -## How to mark bounded boxes of objects and create annotation files: - -Here you can find repository with GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 - v4: https://github.com/AlexeyAB/Yolo_mark - -With example of: `train.txt`, `obj.names`, `obj.data`, `yolo-obj.cfg`, `air`1-6`.txt`, `bird`1-4`.txt` for 2 classes of objects (air, bird) and `train_obj.cmd` with example how to train this image-set with Yolo v2 - v4 - -Different tools for marking objects in images: - -1. in C++: https://github.com/AlexeyAB/Yolo_mark -2. in Python: https://github.com/tzutalin/labelImg -3. in Python: https://github.com/Cartucho/OpenLabeling -4. in C++: https://www.ccoderun.ca/darkmark/ -5. in JavaScript: https://github.com/opencv/cvat -6. in C++: https://github.com/jveitchmichaelis/deeplabel -7. in C#: https://github.com/BMW-InnovationLab/BMW-Labeltool-Lite -8. DL-Annotator for Windows ($30): [url](https://www.microsoft.com/en-us/p/dlannotator/9nsx79m7t8fn?activetab=pivot:overviewtab) -9. v7labs - the greatest cloud labeling tool ($1.5 per hour): https://www.v7labs.com/ - -## How to use Yolo as DLL and SO libraries - -* on Linux - * using `build.sh` or - * build `darknet` using `cmake` or - * set `LIBSO=1` in the `Makefile` and do `make` -* on Windows - * using `build.ps1` or - * build `darknet` using `cmake` or - * compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution - -There are 2 APIs: - -* C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h - * Python examples using the C API: - * https://github.com/AlexeyAB/darknet/blob/master/darknet.py - * https://github.com/AlexeyAB/darknet/blob/master/darknet_video.py - -* C++ API: https://github.com/AlexeyAB/darknet/blob/master/include/yolo_v2_class.hpp - * C++ example that uses C++ API: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp - ----- - -1. To compile Yolo as C++ DLL-file `yolo_cpp_dll.dll` - open the solution `build\darknet\yolo_cpp_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_cpp_dll - * You should have installed **CUDA 10.0** - * To use cuDNN do: (right click on project) -> properties -> C/C++ -> Preprocessor -> Preprocessor Definitions, and add at the beginning of line: `CUDNN;` - -2. To use Yolo as DLL-file in your C++ console application - open the solution `build\darknet\yolo_console_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_console_dll - - * you can run your console application from Windows Explorer `build\darknet\x64\yolo_console_dll.exe` - **use this command**: `yolo_console_dll.exe data/coco.names yolov4.cfg yolov4.weights test.mp4` - - * after launching your console application and entering the image file name - you will see info for each object: - ` ` - * to use simple OpenCV-GUI you should uncomment line `//#define OPENCV` in `yolo_console_dll.cpp`-file: [link](https://github.com/AlexeyAB/darknet/blob/a6cbaeecde40f91ddc3ea09aa26a03ab5bbf8ba8/src/yolo_console_dll.cpp#L5) - * you can see source code of simple example for detection on the video file: [link](https://github.com/AlexeyAB/darknet/blob/ab1c5f9e57b4175f29a6ef39e7e68987d3e98704/src/yolo_console_dll.cpp#L75) - -`yolo_cpp_dll.dll`-API: [link](https://github.com/AlexeyAB/darknet/blob/master/src/yolo_v2_class.hpp#L42) - -```cpp -struct bbox_t { - unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box - float prob; // confidence - probability that the object was found correctly - unsigned int obj_id; // class of object - from range [0, classes-1] - unsigned int track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) - unsigned int frames_counter;// counter of frames on which the object was detected -}; - -class Detector { -public: - Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0); - ~Detector(); - - std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false); - std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false); - static image_t load_image(std::string image_filename); - static void free_image(image_t m); - -#ifdef OPENCV - std::vector detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false); - std::shared_ptr mat_to_image_resize(cv::Mat mat) const; -#endif -}; -``` +Fork from https://github.com/AlexeyAB/darknet/ +Trying to optimizie streaming latency of IP camera and add RTMP streaming function via FFMPEG From 91e3d7d35ff7cbef94954248d952c48b1c7e2f9a Mon Sep 17 00:00:00 2001 From: edwardxliu <44568088+edwardxliu@users.noreply.github.com> Date: Tue, 8 Dec 2020 15:49:20 +0800 Subject: [PATCH 10/20] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 85f86e8f376..5fe6108c582 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ Fork from https://github.com/AlexeyAB/darknet/ + + Trying to optimizie streaming latency of IP camera and add RTMP streaming function via FFMPEG From 3f2d3fa62f135230d4b6a2360686077d4b8032e8 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Mon, 14 Dec 2020 20:30:50 +0800 Subject: [PATCH 11/20] input rtsp stream in stream.cpp with ffmpeg --- Makefile | 2 +- src/stream.cpp | 29 ++++++++++++++++------------- src/streamer.cpp | 8 ++++---- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 886df70cf8a..7fb67797801 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ OPENMP=0 LIBSO=1 ZED_CAMERA=0 ZED_CAMERA_v2_8=0 -STREAM=0 +STREAM=1 FFMPEG=1 # set GPU=1 and CUDNN=1 to speedup on GPU diff --git a/src/stream.cpp b/src/stream.cpp index 3f54edf1c51..75ac96f1f9e 100644 --- a/src/stream.cpp +++ b/src/stream.cpp @@ -33,12 +33,6 @@ #include #include -using namespace streamer; -using time_point = std::chrono::high_resolution_clock::time_point; -using high_resolution_clock = std::chrono::high_resolution_clock; -using std::cerr; -using std::endl; - static char **demo_names; static image **demo_alphabet; static int demo_classes; @@ -77,6 +71,12 @@ static volatile int run_detect_in_thread = 0; static int input_is_stream = 0; #endif +using namespace streamer; +using time_point = std::chrono::high_resolution_clock::time_point; +using high_resolution_clock = std::chrono::high_resolution_clock; +using std::cerr; +using std::endl; + class MovingAverage { int size; @@ -148,7 +148,6 @@ void process_frame(mat_cv *mat_ptr, cv::Mat &out) } } - void stream_frame(Streamer &streamer, const cv::Mat &image) { streamer.stream_frame(image.data); @@ -241,6 +240,7 @@ double get_wall_time() } return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001; } + void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, int benchmark, int benchmark_layers, @@ -365,6 +365,7 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in int frame_counter = 0; int global_frame_counter = 0; + Streamer streamer; int src_frame_width = get_width_mat(det_img); int src_frame_height = get_height_mat(det_img); @@ -375,11 +376,12 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in fprintf(stderr, "Please input a valid stream address \n"); exit(1); } + if (!dst_frame_width) dst_frame_width = src_frame_width; if (!dst_frame_height) dst_frame_height = src_frame_height; if (!stream_bitrate) stream_bitrate = 500000; if (!stream_fps) stream_fps = src_fps; - //if (!stream_profile) stream_profile = "high444"; + if (!stream_profile) stream_profile = "high444"; if (!stream_gop_size) stream_gop_size = 10; StreamerConfig streamer_config(src_frame_width, src_frame_height, @@ -454,7 +456,8 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in if(!prefix){ if (!dont_show) { const int each_frame = max_val_cmp(1, avg_fps / 60); - if(global_frame_counter % each_frame == 0){ //show_image_mat(show_img, "Demo"); + if(global_frame_counter % each_frame == 0){ + //show_image_mat(show_img, "Demo"); process_frame(show_img, proc_frame); if(!filename){ stream_frame(streamer, proc_frame); @@ -541,6 +544,9 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in start_time = get_time_point(); } } +#ifdef FFMPEG + av_pkt_unref(); +#endif time_stop = clk.now(); elapsed_time = std::chrono::duration_cast>(time_stop - time_start); frame_time = std::chrono::duration_cast>(time_stop - time_prev); @@ -548,13 +554,10 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in streamed_frames++; moving_average.add_value(frame_time.count()); avg_frame_time = moving_average.get_average(); - add_delay(streamed_frames, stream_fps, elapsed_time.count(), avg_frame_time); + //add_delay(streamed_frames, stream_fps, elapsed_time.count(), avg_frame_time); //ok = video_capture.read(read_frame); time_prev = time_stop; -#ifdef FFMPEG - av_pkt_unref(); -#endif } printf("input video stream closed. \n"); if (output_video_writer) { diff --git a/src/streamer.cpp b/src/streamer.cpp index 615352ecb4c..0b4c9ffe5dc 100644 --- a/src/streamer.cpp +++ b/src/streamer.cpp @@ -98,10 +98,10 @@ static int set_options_and_open_encoder(AVFormatContext *fctx, AVStream *stream, av_dict_set(&codec_options, "preset", "ultrafast", 0); av_dict_set(&codec_options, "tune", "zerolatency", 0); av_dict_set(&codec_options, "crf", "30", 0); - //av_dict_set(&codec_options, "g", "1", 0); - //av_dict_set(&codec_options, "ar", "44100", 0); - //av_dict_set(&codec_options, "strict", "-2", 0); - //av_dict_set(&codec_options, "-ac", "1", 0); + av_dict_set(&codec_options, "g", "1", 0); + av_dict_set(&codec_options, "ar", "44100", 0); + av_dict_set(&codec_options, "strict", "-2", 0); + av_dict_set(&codec_options, "ac", "1", 0); av_dict_set(&codec_options, "q", "10", 0); // open video encoder From 50660eb3e39bac4acc673d353101c60259086270 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Mon, 14 Dec 2020 20:51:34 +0800 Subject: [PATCH 12/20] update --- README.md | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 README.md diff --git a/README.md b/README.md deleted file mode 100644 index 5fe6108c582..00000000000 --- a/README.md +++ /dev/null @@ -1,4 +0,0 @@ -Fork from https://github.com/AlexeyAB/darknet/ - - -Trying to optimizie streaming latency of IP camera and add RTMP streaming function via FFMPEG From 816210a93d496f49fc69e4d66badbfc92c8244a8 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Mon, 14 Dec 2020 21:06:07 +0800 Subject: [PATCH 13/20] update --- CMakeLists.txt | 4 +- Makefile | 9 +- README.md | 708 +++++++++++++ build/darknet/x64/cfg/cspx-p7-mish.cfg | 20 +- build/darknet/x64/cfg/yolov4-csp.cfg | 1277 ++++++++++++++++++++++++ build/darknet/x64/cfg/yolov4-tiny.cfg | 17 + build/darknet/x64/cfg/yolov4x-mish.cfg | 13 +- cfg/cspx-p7-mish.cfg | 20 +- cfg/yolov4-csp.cfg | 1277 ++++++++++++++++++++++++ cfg/yolov4x-mish.cfg | 13 +- include/darknet.h | 11 + src/convolutional_layer.c | 4 + src/darknet.c | 2 +- src/demo.c | 1 + src/detector.c | 12 +- src/layer.c | 3 + src/network.c | 184 +++- src/network.h | 5 + src/parser.c | 45 +- src/parser.h | 2 +- src/yolo_layer.c | 182 +++- src/yolo_layer.h | 2 +- 22 files changed, 3730 insertions(+), 81 deletions(-) create mode 100644 README.md create mode 100644 build/darknet/x64/cfg/yolov4-csp.cfg create mode 100644 cfg/yolov4-csp.cfg diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c7224e4e8b..ea7c4a64760 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,7 +62,7 @@ include(CheckLanguage) check_language(CUDA) if(CMAKE_CUDA_COMPILER AND ENABLE_CUDA) set(CUDA_ARCHITECTURES "Auto" CACHE STRING "\"Auto\" detects local machine GPU compute arch at runtime, \"Common\" and \"All\" cover common and entire subsets of architectures, \"Names\" is a list of architectures to enable by name, \"Numbers\" is a list of compute capabilities (version number) to enable") - set_property(CACHE CUDA_ARCHITECTURES PROPERTY STRINGS "Auto" "Common" "All" "Kepler Maxwell Kepler+Tegra Maxwell+Tegra Pascal" "5.0 7.5") + set_property(CACHE CUDA_ARCHITECTURES PROPERTY STRINGS "Auto" "Common" "All" "Kepler Maxwell Kepler+Tegra Maxwell+Tegra Pascal" "5.0 7.5 8.6") enable_language(CUDA) if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "9.0") message(STATUS "Unsupported CUDA version, please upgrade to CUDA 9+. Disabling CUDA support") @@ -71,7 +71,7 @@ if(CMAKE_CUDA_COMPILER AND ENABLE_CUDA) find_package(CUDA REQUIRED) cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS ${CUDA_ARCHITECTURES}) message(STATUS "Building with CUDA flags: " "${CUDA_ARCH_FLAGS}") - if (NOT "arch=compute_70,code=sm_70" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_72,code=sm_72" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_75,code=sm_75" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_80,code=sm_80" IN_LIST CUDA_ARCH_FLAGS) + if (NOT "arch=compute_70,code=sm_70" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_72,code=sm_72" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_75,code=sm_75" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_80,code=sm_80" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_86,code=sm_86" IN_LIST CUDA_ARCH_FLAGS) set(ENABLE_CUDNN_HALF "FALSE" CACHE BOOL "Enable CUDNN Half precision" FORCE) message(STATUS "Your setup does not supports half precision (it requires CC >= 7.0)") else() diff --git a/Makefile b/Makefile index 7fb67797801..743404c1ce2 100644 --- a/Makefile +++ b/Makefile @@ -19,14 +19,19 @@ FFMPEG=1 USE_CPP=0 DEBUG=1 -ARCH= -gencode arch=compute_30,code=sm_30 \ - -gencode arch=compute_35,code=sm_35 \ +ARCH= -gencode arch=compute_35,code=sm_35 \ -gencode arch=compute_50,code=[sm_50,compute_50] \ -gencode arch=compute_52,code=[sm_52,compute_52] \ -gencode arch=compute_61,code=[sm_61,compute_61] OS := $(shell uname) +# GeForce RTX 3070, 3080, 3090 +# ARCH= -gencode arch=compute_86,code=[sm_86,compute_86] + +# Kepler GeForce GTX 770, GTX 760, GT 740 +# ARCH= -gencode arch=compute_30,code=sm_30 + # Tesla A100 (GA100), DGX-A100, RTX 3080 # ARCH= -gencode arch=compute_80,code=[sm_80,compute_80] diff --git a/README.md b/README.md new file mode 100644 index 00000000000..203f77c7591 --- /dev/null +++ b/README.md @@ -0,0 +1,708 @@ +# Yolo v4, v3 and v2 for Windows and Linux + +## (neural networks for object detection) + +Paper YOLO v4: https://arxiv.org/abs/2004.10934 + +Paper Scaled YOLO v4: https://arxiv.org/abs/2011.08036 use to reproduce results: [ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4) + +More details in articles on medium: + * [Scaled_YOLOv4](https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8) + * [YOLOv4](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7) + +Manual: https://github.com/AlexeyAB/darknet/wiki + +Discussion: + - [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/) + - [Google-groups](https://groups.google.com/forum/#!forum/darknet) + - [Discord](https://discord.gg/zSq8rtW) + +About Darknet framework: http://pjreddie.com/darknet/ + +[![Darknet Continuous Integration](https://github.com/AlexeyAB/darknet/workflows/Darknet%20Continuous%20Integration/badge.svg)](https://github.com/AlexeyAB/darknet/actions?query=workflow%3A%22Darknet+Continuous+Integration%22) +[![CircleCI](https://circleci.com/gh/AlexeyAB/darknet.svg?style=svg)](https://circleci.com/gh/AlexeyAB/darknet) +[![TravisCI](https://travis-ci.org/AlexeyAB/darknet.svg?branch=master)](https://travis-ci.org/AlexeyAB/darknet) +[![Contributors](https://img.shields.io/github/contributors/AlexeyAB/Darknet.svg)](https://github.com/AlexeyAB/darknet/graphs/contributors) +[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE) +[![DOI](https://zenodo.org/badge/75388965.svg)](https://zenodo.org/badge/latestdoi/75388965) +[![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2004.10934-B31B1B.svg)](https://arxiv.org/abs/2004.10934) +[![colab](https://user-images.githubusercontent.com/4096485/86174089-b2709f80-bb29-11ea-9faf-3d8dc668a1a5.png)](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) +[![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg) + + +* [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo) +* [Requirements (and how to install dependecies)](#requirements) +* [Pre-trained models](#pre-trained-models) +* [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions) +* [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations) +* [Yolo v4 in other frameworks (TensorRT, TensorFlow, PyTorch, OpenVINO, OpenCV-dnn, TVM,...)](#yolo-v4-in-other-frameworks) +* [Datasets](#datasets) + +0. [Improvements in this repository](#improvements-in-this-repository) +1. [How to use](#how-to-use-on-the-command-line) +2. How to compile on Linux + * [Using cmake](#how-to-compile-on-linux-using-cmake) + * [Using make](#how-to-compile-on-linux-using-make) +3. How to compile on Windows + * [Using cmake](#how-to-compile-on-windows-using-cmake) + * [Using vcpkg](#how-to-compile-on-windows-using-vcpkg) + * [Legacy way](#how-to-compile-on-windows-legacy-way) +4. [Training and Evaluation of speed and accuracy on MS COCO](https://github.com/AlexeyAB/darknet/wiki#training-and-evaluation-of-speed-and-accuracy-on-ms-coco) +5. [How to train with multi-GPU:](#how-to-train-with-multi-gpu) +6. [How to train (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects) +7. [How to train tiny-yolo (to detect your custom objects)](#how-to-train-tiny-yolo-to-detect-your-custom-objects) +8. [When should I stop training](#when-should-i-stop-training) +9. [How to improve object detection](#how-to-improve-object-detection) +10. [How to mark bounded boxes of objects and create annotation files](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files) +11. [How to use Yolo as DLL and SO libraries](#how-to-use-yolo-as-dll-and-so-libraries) + +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) + +![scaled_yolov4](https://user-images.githubusercontent.com/4096485/101356322-f1f5a180-38a8-11eb-9907-4fe4f188d887.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 + +---- + +![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934 + + +tkDNN-TensorRT accelerates YOLOv4 **~2x** times for batch=1 and **3x-4x** times for batch=4. +* tkDNN: https://github.com/ceccocats/tkDNN +* OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf + +#### GeForce RTX 2080 Ti: +| Network Size | Darknet, FPS (avg)| tkDNN TensorRT FP32, FPS | tkDNN TensorRT FP16, FPS | OpenCV FP16, FPS | tkDNN TensorRT FP16 batch=4, FPS | OpenCV FP16 batch=4, FPS | tkDNN Speedup | +|:-----:|:--------:|--------:|--------:|--------:|--------:|--------:|------:| +|320 | 100 | 116 | **202** | 183 | 423 | **430** | **4.3x** | +|416 | 82 | 103 | **162** | 159 | 284 | **294** | **3.6x** | +|512 | 69 | 91 | 134 | **138** | 206 | **216** | **3.1x** | +|608 | 53 | 62 | 103 | **115**| 150 | **150** | **2.8x** | +|Tiny 416 | 443 | 609 | **790** | 773 | **1774** | 1353 | **3.5x** | +|Tiny 416 CPU Core i7 7700HQ | 3.4 | - | - | 42 | - | 39 | **12x** | + +* Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png) +* Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png) +* CSPNet: [paper](https://arxiv.org/abs/1911.11929) and [map_fps](https://user-images.githubusercontent.com/4096485/71702416-6645dc00-2de0-11ea-8d65-de7d4b604021.png) comparison: https://github.com/WongKinYiu/CrossStagePartialNetworks +* Yolo v3 on MS COCO: [Speed / Accuracy (mAP@0.5) chart](https://user-images.githubusercontent.com/4096485/52151356-e5d4a380-2683-11e9-9d7d-ac7bc192c477.jpg) +* Yolo v3 on MS COCO (Yolo v3 vs RetinaNet) - Figure 3: https://arxiv.org/pdf/1804.02767v1.pdf +* Yolo v2 on Pascal VOC 2007: https://hsto.org/files/a24/21e/068/a2421e0689fb43f08584de9d44c2215f.jpg +* Yolo v2 on Pascal VOC 2012 (comp4): https://hsto.org/files/3a6/fdf/b53/3a6fdfb533f34cee9b52bdd9bb0b19d9.jpg + +#### Youtube video of results + +| [![Yolo v4](https://user-images.githubusercontent.com/4096485/101360000-1a33cf00-38ae-11eb-9e5e-b29c5fb0afbe.png)](https://youtu.be/1_SiUOYUoOI "Yolo v4") | [![Scaled Yolo v4](https://user-images.githubusercontent.com/4096485/101359389-43a02b00-38ad-11eb-866c-f813e96bf61a.png)](https://youtu.be/YDFf-TqJOFE "Scaled Yolo v4") | +|---|---| + +Others: https://www.youtube.com/user/pjreddie/videos + +#### How to evaluate AP of YOLOv4 on the MS COCO evaluation server + +1. Download and unzip test-dev2017 dataset from MS COCO server: http://images.cocodataset.org/zips/test2017.zip +2. Download list of images for Detection taks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt +3. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) +4. Content of the file `cfg/coco.data` should be + +```ini +classes= 80 +train = /trainvalno5k.txt +valid = /testdev2017.txt +names = data/coco.names +backup = backup +eval=coco +``` + +5. Create `/results/` folder near with `./darknet` executable file +6. Run validation: `./darknet detector valid cfg/coco.data cfg/yolov4.cfg yolov4.weights` +7. Rename the file `/results/coco_results.json` to `detections_test-dev2017_yolov4_results.json` and compress it to `detections_test-dev2017_yolov4_results.zip` +8. Submit file `detections_test-dev2017_yolov4_results.zip` to the MS COCO evaluation server for the `test-dev2019 (bbox)` + +#### How to evaluate FPS of YOLOv4 on GPU + +1. Compile Darknet with `GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=1` in the `Makefile` +2. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) +3. Get any .avi/.mp4 video file (preferably not more than 1920x1080 to avoid bottlenecks in CPU performance) +4. Run one of two commands and look at the AVG FPS: + +* include video_capturing + NMS + drawing_bboxes: + `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -dont_show -ext_output` +* exclude video_capturing + NMS + drawing_bboxes: + `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -benchmark` + +#### Pre-trained models + +There are weights-file for different cfg-files (trained for MS COCO dataset): + +FPS on RTX 2070 (R) and Tesla V100 (V): + +* [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - **67.9% mAP@0.5 (49.4% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights) + * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166 + +* [yolov4-csp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg) - 202 MB: [yolov4-csp.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights) paper [Scaled Yolo v4](https://arxiv.org/abs/2011.08036) + + just change `width=` and `height=` parameters in `yolov4-csp.cfg` file and use the same `yolov4-csp.weights` file for all cases: + * `width=608 height=608` in cfg: **66.2% mAP@0.5 (47.5% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops + * `width=512 height=512` in cfg: **64.8% mAP@0.5 (46.2% AP@0.5:0.95) - 93(V) FPS** - 77 (39 FMA) BFlops + * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142 + +* [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934) + just change `width=` and `height=` parameters in `yolov4.cfg` file and use the same `yolov4.weights` file for all cases: + * `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops + * `width=512 height=512` in cfg: **64.9% mAP@0.5 (43.0% AP@0.5:0.95) - 45(R) FPS / 83(V) FPS** - 91.1 BFlops + * `width=416 height=416` in cfg: **62.8% mAP@0.5 (41.2% AP@0.5:0.95) - 55(R) FPS / 96(V) FPS** - 60.1 BFlops + * `width=320 height=320` in cfg: **60% mAP@0.5 ( 38% AP@0.5:0.95) - 63(R) FPS / 123(V) FPS** - 35.5 BFlops + +* [yolov4-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg) - **40.2% mAP@0.5 - 371(1080Ti) FPS / 330(RTX2070) FPS** - 6.9 BFlops - 23.1 MB: [yolov4-tiny.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights) + +* [enet-coco.cfg (EfficientNetB0-Yolov3)](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/enet-coco.cfg) - **45.5% mAP@0.5 - 55(R) FPS** - 3.7 BFlops - 18.3 MB: [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view) + +* [yolov3-openimages.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-openimages.cfg) - 247 MB - 18(R) FPS - OpenImages dataset: [yolov3-openimages.weights](https://pjreddie.com/media/files/yolov3-openimages.weights) + +
CLICK ME - Yolo v3 models + +* [csresnext50-panet-spp-original-optimal.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp-original-optimal.cfg) - **65.4% mAP@0.5 (43.2% AP@0.5:0.95) - 32(R) FPS** - 100.5 BFlops - 217 MB: [csresnext50-panet-spp-original-optimal_final.weights](https://drive.google.com/open?id=1_NnfVgj0EDtb_WLNoXV8Mo7WKgwdYZCc) + +* [yolov3-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg) - **60.6% mAP@0.5 - 38(R) FPS** - 141.5 BFlops - 240 MB: [yolov3-spp.weights](https://pjreddie.com/media/files/yolov3-spp.weights) + +* [csresnext50-panet-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp.cfg) - **60.0% mAP@0.5 - 44 FPS** - 71.3 BFlops - 217 MB: [csresnext50-panet-spp_final.weights](https://drive.google.com/file/d/1aNXdM8qVy11nqTcd2oaVB3mf7ckr258-/view?usp=sharing) + +* [yolov3.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg) - **55.3% mAP@0.5 - 66(R) FPS** - 65.9 BFlops - 236 MB: [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) + +* [yolov3-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg) - **33.1% mAP@0.5 - 345(R) FPS** - 5.6 BFlops - 33.7 MB: [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) + +* [yolov3-tiny-prn.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny-prn.cfg) - **33.1% mAP@0.5 - 370(R) FPS** - 3.5 BFlops - 18.8 MB: [yolov3-tiny-prn.weights](https://drive.google.com/file/d/18yYZWyKbo4XSDVyztmsEcF9B_6bxrhUY/view?usp=sharing) + +
+ +
CLICK ME - Yolo v2 models + +* `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights +* `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights +* `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights +* `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights +* `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights + +
+ +Put it near compiled: darknet.exe + +You can get cfg-files by path: `darknet/cfg/` + +### Requirements + +* Windows or Linux +* **CMake >= 3.12**: https://cmake.org/download/ +* **CUDA >= 10.0**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)) +* **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png)) +* **cuDNN >= 7.0** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows ) +* **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported +* on Linux **GCC or Clang**, on Windows **MSVC 2017/2019** https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community + +#### Yolo v4 in other frameworks + +* **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4 +* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Andriod: https://github.com/hunglc007/tensorflow-yolov4-tflite + For YOLOv3 - convert `yolov3.weights`/`cfg` files to `yolov3.ckpt`/`pb/meta`: by using [mystic123](https://github.com/mystic123/tensorflow-yolo-v3) project, and [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format) +* **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150) +* **Intel OpenVINO 2020 R4:** (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) ) +* **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn +* **PyTorch > ONNX**: + * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4) + * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch) + * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4) + * [YOLOv5](https://github.com/ultralytics/yolov5) +* **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/ +* **TensorRT** YOLOv4 on TensorRT+tkDNN: https://github.com/ceccocats/tkDNN + For YOLOv3 (-70% faster inference): [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) read [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx) implemented yolov3-spp, yolov4, etc. +* **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream +* **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/) +* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backends (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about +* **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite +* **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron + +#### Datasets + +* MS COCO: use `./scripts/get_coco_dataset.sh` to get labeled MS COCO detection dataset +* OpenImages: use `python ./scripts/get_openimages_dataset.py` for labeling train detection dataset +* Pascal VOC: use `python ./scripts/voc_label.py` for labeling Train/Test/Val detection datasets +* ILSVRC2012 (ImageNet classification): use `./scripts/get_imagenet_train.sh` (also `imagenet_label.sh` for labeling valid set) +* German/Belgium/Russian/LISA/MASTIF Traffic Sign Datasets for Detection - use this parsers: https://github.com/angeligareta/Datasets2Darknet#detection-task +* List of other datasets: https://github.com/AlexeyAB/darknet/tree/master/scripts#datasets + +### Improvements in this repository + +* developed State-of-the-Art object detector YOLOv4 +* added State-of-Art models: CSP, PRN, EfficientNet +* added layers: [conv_lstm], [scale_channels] SE/ASFF/BiFPN, [local_avgpool], [sam], [Gaussian_yolo], [reorg3d] (fixed [reorg]), fixed [batchnorm] +* added the ability for training recurrent models (with layers conv-lstm`[conv_lstm]`/conv-rnn`[crnn]`) for accurate detection on video +* added data augmentation: `[net] mixup=1 cutmix=1 mosaic=1 blur=1`. Added activations: SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX +* added the ability for training with GPU-processing using CPU-RAM to increase the mini_batch_size and increase accuracy (instead of batch-norm sync) +* improved binary neural network performance **2x-4x times** for Detection on CPU and GPU if you trained your own weights by using this XNOR-net model (bit-1 inference) : https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov3-tiny_xnor.cfg +* improved neural network performance **~7%** by fusing 2 layers into 1: Convolutional + Batch-norm +* improved performance: Detection **2x times**, on GPU Volta/Turing (Tesla V100, GeForce RTX, ...) using Tensor Cores if `CUDNN_HALF` defined in the `Makefile` or `darknet.sln` +* improved performance **~1.2x** times on FullHD, **~2x** times on 4K, for detection on the video (file/stream) using `darknet detector demo`... +* improved performance **3.5 X times** of data augmentation for training (using OpenCV SSE/AVX functions instead of hand-written functions) - removes bottleneck for training on multi-GPU or GPU Volta +* improved performance of detection and training on Intel CPU with AVX (Yolo v3 **~85%**) +* optimized memory allocation during network resizing when `random=1` +* optimized GPU initialization for detection - we use batch=1 initially instead of re-init with batch=1 +* added correct calculation of **mAP, F1, IoU, Precision-Recall** using command `darknet detector map`... +* added drawing of chart of average-Loss and accuracy-mAP (`-map` flag) during training +* run `./darknet detector demo ... -json_port 8070 -mjpeg_port 8090` as JSON and MJPEG server to get results online over the network by using your soft or Web-browser +* added calculation of anchors for training +* added example of Detection and Tracking objects: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp +* run-time tips and warnings if you use incorrect cfg-file or dataset +* added support for Windows +* many other fixes of code... + +And added manual - [How to train Yolo v4-v2 (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects) + +Also, you might be interested in using a simplified repository where is implemented INT8-quantization (+30% speedup and -1% mAP reduced): https://github.com/AlexeyAB/yolo2_light + +#### How to use on the command line + +On Linux use `./darknet` instead of `darknet.exe`, like this:`./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights` + +On Linux find executable file `./darknet` in the root directory, while on Windows find it in the directory `\build\darknet\x64` + +* Yolo v4 COCO - **image**: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25` +* **Output coordinates** of objects: `darknet.exe detector test cfg/coco.data yolov4.cfg yolov4.weights -ext_output dog.jpg` +* Yolo v4 COCO - **video**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output test.mp4` +* Yolo v4 COCO - **WebCam 0**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -c 0` +* Yolo v4 COCO for **net-videocam** - Smart WebCam: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg` +* Yolo v4 - **save result videofile res.avi**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -out_filename res.avi` +* Yolo v3 **Tiny** COCO - video: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights test.mp4` +* **JSON and MJPEG server** that allows multiple connections from your soft or Web-browser `ip-address:8070` and 8090: `./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -json_port 8070 -mjpeg_port 8090 -ext_output` +* Yolo v3 Tiny **on GPU #1**: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights -i 1 test.mp4` +* Alternative method Yolo v3 COCO - image: `darknet.exe detect cfg/yolov4.cfg yolov4.weights -i 0 -thresh 0.25` +* Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): + `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` +* 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights` +* Remeber to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app +* To process a list of images `data/train.txt` and save results of detection to `result.json` file use: + `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt` +* To process a list of images `data/train.txt` and save results of detection to `result.txt` use: + `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt` +* Pseudo-lableing - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `.txt` (in this way you can increase the amount of training data) use: + `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt` +* To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` +* To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` +* To check accuracy mAP@IoU=75: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights -iou_thresh 0.75` + +##### For using network video-camera mjpeg-stream with any Android smartphone + +1. Download for Android phone mjpeg-stream soft: IP Webcam / Smart WebCam + + * Smart WebCam - preferably: https://play.google.com/store/apps/details?id=com.acontech.android.SmartWebCam2 + * IP Webcam: https://play.google.com/store/apps/details?id=com.pas.webcam + +2. Connect your Android phone to computer by WiFi (through a WiFi-router) or USB +3. Start Smart WebCam on your phone +4. Replace the address below, on shown in the phone application (Smart WebCam) and launch: + +* Yolo v4 COCO-model: `darknet.exe detector demo data/coco.data yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0` + +### How to compile on Linux/macOS (using `CMake`) + +The `CMakeLists.txt` will attempt to find installed optional dependencies like CUDA, cudnn, ZED and build against those. It will also create a shared object library file to use `darknet` for code development. + +Open a shell terminal inside the cloned repository and launch: + +```bash +./build.sh +``` + +### How to compile on Linux (using `make`) + +Just do `make` in the darknet directory. (You can try to compile and run it on Google Colab in cloud [link](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) (press «Open in Playground» button at the top-left corner) and watch the video [link](https://www.youtube.com/watch?v=mKAEGSxwOAY) ) +Before make, you can set such options in the `Makefile`: [link](https://github.com/AlexeyAB/darknet/blob/9c1b9a2cf6363546c152251be578a21f3c3caec6/Makefile#L1) + +* `GPU=1` to build with CUDA to accelerate by using GPU (CUDA should be in `/usr/local/cuda`) +* `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`) +* `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x +* `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams +* `DEBUG=1` to bould debug version of Yolo +* `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU +* `LIBSO=1` to build a library `darknet.so` and binary runable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp + or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4` +* `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run + `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera` +* You also need to specify for which graphics card the code is generated. This is done by setting `ARCH=`. If you use a never version than CUDA 11 you further need to edit line 20 from Makefile and remove `-gencode arch=compute_30,code=sm_30 \` as Kepler GPU support was dropped in CUDA 11. You can also drop the general `ARCH=` and just uncomment `ARCH=` for your graphics card. + +To run Darknet on Linux use examples from this article, just use `./darknet` instead of `darknet.exe`, i.e. use this command: `./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights` + +### How to compile on Windows (using `CMake`) + +This is the recommended approach to build Darknet on Windows. + +1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com) + +2. Install CUDA (at least v10.0) enabling VS Integration during installation. + +3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands: + +```PowerShell +PS Code\> git clone https://github.com/microsoft/vcpkg +PS Code\> cd vcpkg +PS Code\vcpkg> $env:VCPKG_ROOT=$PWD +PS Code\vcpkg> .\bootstrap-vcpkg.bat +PS Code\vcpkg> .\vcpkg install darknet[full]:x64-windows #replace with darknet[opencv-base,cuda,cudnn]:x64-windows for a quicker install of dependencies +PS Code\vcpkg> cd .. +PS Code\> git clone https://github.com/AlexeyAB/darknet +PS Code\> cd darknet +PS Code\darknet> .\build.ps1 +``` + +## How to train with multi-GPU + +1. Train it first on 1 GPU for like 1000 iterations: `darknet.exe detector train cfg/coco.data cfg/yolov4.cfg yolov4.conv.137` + +2. Then stop and by using partially-trained model `/backup/yolov4_1000.weights` run training with multigpu (up to 4 GPUs): `darknet.exe detector train cfg/coco.data cfg/yolov4.cfg /backup/yolov4_1000.weights -gpus 0,1,2,3` + +If you get a Nan, then for some datasets better to decrease learning rate, for 4 GPUs set `learning_rate = 0,00065` (i.e. learning_rate = 0.00261 / GPUs). In this case also increase 4x times `burn_in =` in your cfg-file. I.e. use `burn_in = 4000` instead of `1000`. + +https://groups.google.com/d/msg/darknet/NbJqonJBTSY/Te5PfIpuCAAJ + +## How to train (to detect your custom objects) + +(to train old Yolo v2 `yolov2-voc.cfg`, `yolov2-tiny-voc.cfg`, `yolo-voc.cfg`, `yolo-voc.2.0.cfg`, ... [click by the link](https://github.com/AlexeyAB/darknet/tree/47c7af1cea5bbdedf1184963355e6418cb8b1b4f#how-to-train-pascal-voc-data)) + +Training Yolo v4 (and v3): + +0. For training `cfg/yolov4-custom.cfg` download the pre-trained weights-file (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) ) + +1. Create file `yolo-obj.cfg` with the same content as in `yolov4-custom.cfg` (or copy `yolov4-custom.cfg` to `yolo-obj.cfg)` and: + +* change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3) +* change line subdivisions to [`subdivisions=16`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) +* change line max_batches to (`classes*2000` but not less than number of training images, but not less than number of training images and not less than `6000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes +* change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22) +* set network size `width=416 height=416` or any value multiple of 32: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9 +* change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers: + * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610 + * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696 + * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783 +* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers. + * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603 + * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689 + * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776 +* when using [`[Gaussian_yolo]`](https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L608) layers, change [`filters=57`] filters=(classes + 9)x3 in the 3 `[convolutional]` before each `[Gaussian_yolo]` layer + * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L604 + * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L696 + * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789 + +So if `classes=1` then should be `filters=18`. If `classes=2` then write `filters=21`. + +**(Do not write in the cfg-file: filters=(classes + 5)x3)** + +(Generally `filters` depends on the `classes`, `coords` and number of `mask`s, i.e. filters=`(classes + coords + 1)*`, where `mask` is indices of anchors. If `mask` is absence, then filters=`(classes + coords + 1)*num`) + +So for example, for 2 objects, your file `yolo-obj.cfg` should differ from `yolov4-custom.cfg` in such lines in each of **3** [yolo]-layers: + +```ini +[convolutional] +filters=21 + +[region] +classes=2 +``` + +2. Create file `obj.names` in the directory `build\darknet\x64\data\`, with objects names - each in new line + +3. Create file `obj.data` in the directory `build\darknet\x64\data\`, containing (where **classes = number of objects**): + + ```ini + classes = 2 + train = data/train.txt + valid = data/test.txt + names = data/obj.names + backup = backup/ + ``` + +4. Put image-files (.jpg) of your objects in the directory `build\darknet\x64\data\obj\` + +5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark + +It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: + +` ` + + Where: + * `` - integer object number from `0` to `(classes-1)` + * ` ` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]` + * for example: ` = / ` or ` = / ` + * atention: ` ` - are center of rectangle (are not top-left corner) + + For example for `img1.jpg` you will be created `img1.txt` containing: + + ``` + 1 0.716797 0.395833 0.216406 0.147222 + 0 0.687109 0.379167 0.255469 0.158333 + 1 0.420312 0.395833 0.140625 0.166667 + ``` + +6. Create file `train.txt` in directory `build\darknet\x64\data\`, with filenames of your images, each filename in new line, with path relative to `darknet.exe`, for example containing: + + ``` + data/obj/img1.jpg + data/obj/img2.jpg + data/obj/img3.jpg + ``` + +7. Download pre-trained weights for the convolutional layers and put to the directory `build\darknet\x64` + * for `yolov4.cfg`, `yolov4-custom.cfg` (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) ) + * for `yolov4-tiny.cfg`, `yolov4-tiny-3l.cfg`, `yolov4-tiny-custom.cfg` (19 MB): [yolov4-tiny.conv.29](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29) + * for `csresnext50-panet-spp.cfg` (133 MB): [csresnext50-panet-spp.conv.112](https://drive.google.com/file/d/16yMYCLQTY_oDlCIZPfn_sab6KD3zgzGq/view?usp=sharing) + * for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74) + * for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing) + * for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing) + + +8. Start training by using the command line: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137` + + To train on Linux use command: `./darknet detector train data/obj.data yolo-obj.cfg yolov4.conv.137` (just use `./darknet` instead of `darknet.exe`) + + * (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations) + * (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations) + * (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2) + * (to see the mAP & Loss-chart during training on remote server without GUI, use command `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` then open URL `http://ip-address:8090` in Chrome/Firefox browser) + +8.1. For training with mAP (mean average precisions) calculation for each 4 Epochs (set `valid=valid.txt` or `train.txt` in `obj.data` file) and run: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` + +9. After training is complete - get result `yolo-obj_final.weights` from path `build\darknet\x64\backup\` + + * After each 100 iterations you can stop and later start training from this point. For example, after 2000 iterations you can stop training, and later just start training using: `darknet.exe detector train data/obj.data yolo-obj.cfg backup\yolo-obj_2000.weights` + + (in the original repository https://github.com/pjreddie/darknet the weights-file is saved only once every 10 000 iterations `if(iterations > 1000)`) + + * Also you can get result earlier than all 45000 iterations. + + **Note:** If during training you see `nan` values for `avg` (loss) field - then training goes wrong, but if `nan` is in some other lines - then training goes well. + + **Note:** If you changed width= or height= in your cfg-file, then new width and height must be divisible by 32. + + **Note:** After training use such command for detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` + + **Note:** if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) + +### How to train tiny-yolo (to detect your custom objects): + +Do all the same steps as for the full yolo model as described above. With the exception of: +* Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29 + (Or get this file from yolov4-tiny.weights file by using command: `darknet.exe partial cfg/yolov4-tiny-custom.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29` +* Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg` +* Start training: `darknet.exe detector train data/obj.data yolov4-tiny-obj.cfg yolov4-tiny.conv.29` + +For training Yolo based on other models ([DenseNet201-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/densenet201_yolo.cfg) or [ResNet50-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/resnet50_yolo.cfg)), you can download and get pre-trained weights as showed in this file: https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/partial.cmd +If you made you custom model that isn't based on other models, then you can train it without pre-trained weights, then will be used random initial weights. + +## When should I stop training: + +Usually sufficient 2000 iterations for each class(object), but not less than number of training images and not less than 6000 iterations in total. But for a more precise definition when you should stop training, use the following manual: + +1. During training, you will see varying indicators of error, and you should stop when no longer decreases **0.XXXXXXX avg**: + + > Region Avg IOU: 0.798363, Class: 0.893232, Obj: 0.700808, No Obj: 0.004567, Avg Recall: 1.000000, count: 8 + > Region Avg IOU: 0.800677, Class: 0.892181, Obj: 0.701590, No Obj: 0.004574, Avg Recall: 1.000000, count: 8 + > + > **9002**: 0.211667, **0.60730 avg**, 0.001000 rate, 3.868000 seconds, 576128 images + > Loaded: 0.000000 seconds + + * **9002** - iteration number (number of batch) + * **0.60730 avg** - average loss (error) - **the lower, the better** + + When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset). + + Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. + +2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them: + +For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to overfitting. **Overfitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**: + +![Overfitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) + +To get weights from Early Stopping Point: + + 2.1. At first, in your file `obj.data` you must specify the path to the validation dataset `valid = valid.txt` (format of `valid.txt` as in `train.txt`), and if you haven't validation images, just copy `data\train.txt` to `data\valid.txt`. + + 2.2 If training is stopped after 9000 iterations, to validate some of previous weights use this commands: + +(If you use another GitHub repository, then use `darknet.exe detector recall`... instead of `darknet.exe detector map`...) + +* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` +* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights` +* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights` + +And comapre last output lines for each weights (7000, 8000, 9000): + +Choose weights-file **with the highest mAP (mean average precision)** or IoU (intersect over union) + +For example, **bigger mAP** gives weights `yolo-obj_8000.weights` - then **use this weights for detection**. + +Or just train with `-map` flag: + +`darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` + +So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calculated for each 4 Epochs using `valid=valid.txt` file that is specified in `obj.data` file (`1 Epoch = images_in_train_txt / batch` iterations) + +(to change the max x-axis value - change [`max_batches=`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) parameter to `2000*classes`, f.e. `max_batches=6000` for 3 classes) + +![loss_chart_map_chart](https://hsto.org/webt/yd/vl/ag/ydvlagutof2zcnjodstgroen8ac.jpeg) + +Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` + +* **IoU** (intersect over union) - average instersect over union of objects and detections for a certain threshold = 0.24 + +* **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf + +**mAP** is default metric of precision in the PascalVOC competition, **this is the same as AP50** metric in the MS COCO competition. +In terms of Wiki, indicators Precision and Recall have a slightly different meaning than in the PascalVOC competition, but **IoU always has the same meaning**. + +![precision_recall_iou](https://hsto.org/files/ca8/866/d76/ca8866d76fb840228940dbf442a7f06a.jpg) + + +### Custom object detection: + +Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` + +| ![Yolo_v2_training](https://hsto.org/files/d12/1e7/515/d121e7515f6a4eb694913f10de5f2b61.jpg) | ![Yolo_v2_training](https://hsto.org/files/727/c7e/5e9/727c7e5e99bf4d4aa34027bb6a5e4bab.jpg) | +|---|---| + +## How to improve object detection: + +1. Before training: + +* set flag `random=1` in your `.cfg`-file - it will increase precision by training Yolo for different resolutions: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L788) + +* increase network resolution in your `.cfg`-file (`height=608`, `width=608` or any value multiple of 32) - it will increase precision + +* check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark + +* my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong. + +* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more + +* desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects + +* What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected. + +* for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file) + +* for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = 23` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L895 + * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L892 + * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L989 + +* for training for both small and large objects use modified models: + * Full-model: 5 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3_5l.cfg + * Tiny-model: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny_3l.cfg + * YOLOv4: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-custom.cfg + +* If you train the model to distinguish Left and Right objects as separate classes (left/right hand, left/right-turn on road signs, ...) then for disabling flip data augmentation - add `flip=0` here: https://github.com/AlexeyAB/darknet/blob/3d2d0a7c98dbc8923d9ff705b81ff4f7940ea6ff/cfg/yolov3.cfg#L17 + +* General rule - your training dataset should include such a set of relative sizes of objects that you want to detect: + * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width` + * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height` + + I.e. for each object from Test dataset there must be at least 1 object in the Training dataset with the same class_id and about the same relative size: + + `object width in percent from Training dataset` ~= `object width in percent from Test dataset` + + That is, if only objects that occupied 80-90% of the image were present in the training set, then the trained network will not be able to detect objects that occupy 1-10% of the image. + +* to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file + +* each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used. + +* to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5. + +* Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file: +`darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` +then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But you should change indexes of anchors `masks=` for each [yolo]-layer, so for YOLOv4 the 1st-[yolo]-layer has anchors smaller than 30x30, 2nd smaller than 60x60, 3rd remaining, and vice versa for YOLOv3. Also you should change the `filters=(classes + 5)*` before each [yolo]-layer. If many of the calculated anchors do not fit under the appropriate layers - then just try using all the default anchors. + +2. After training - for detection: + +* Increase network-resolution by set in your `.cfg`-file (`height=608` and `width=608`) or (`height=832` and `width=832`) or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9) + +* it is not necessary to train the network again, just use `.weights`-file already trained for 416x416 resolution + +* to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) + +## How to mark bounded boxes of objects and create annotation files: + +Here you can find repository with GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 - v4: https://github.com/AlexeyAB/Yolo_mark + +With example of: `train.txt`, `obj.names`, `obj.data`, `yolo-obj.cfg`, `air`1-6`.txt`, `bird`1-4`.txt` for 2 classes of objects (air, bird) and `train_obj.cmd` with example how to train this image-set with Yolo v2 - v4 + +Different tools for marking objects in images: + +1. in C++: https://github.com/AlexeyAB/Yolo_mark +2. in Python: https://github.com/tzutalin/labelImg +3. in Python: https://github.com/Cartucho/OpenLabeling +4. in C++: https://www.ccoderun.ca/darkmark/ +5. in JavaScript: https://github.com/opencv/cvat +6. in C++: https://github.com/jveitchmichaelis/deeplabel +7. in C#: https://github.com/BMW-InnovationLab/BMW-Labeltool-Lite +8. DL-Annotator for Windows ($30): [url](https://www.microsoft.com/en-us/p/dlannotator/9nsx79m7t8fn?activetab=pivot:overviewtab) +9. v7labs - the greatest cloud labeling tool ($1.5 per hour): https://www.v7labs.com/ + +## How to use Yolo as DLL and SO libraries + +* on Linux + * using `build.sh` or + * build `darknet` using `cmake` or + * set `LIBSO=1` in the `Makefile` and do `make` +* on Windows + * using `build.ps1` or + * build `darknet` using `cmake` or + * compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution + +There are 2 APIs: + +* C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h + * Python examples using the C API: + * https://github.com/AlexeyAB/darknet/blob/master/darknet.py + * https://github.com/AlexeyAB/darknet/blob/master/darknet_video.py + +* C++ API: https://github.com/AlexeyAB/darknet/blob/master/include/yolo_v2_class.hpp + * C++ example that uses C++ API: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp + +---- + +1. To compile Yolo as C++ DLL-file `yolo_cpp_dll.dll` - open the solution `build\darknet\yolo_cpp_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_cpp_dll + * You should have installed **CUDA 10.0** + * To use cuDNN do: (right click on project) -> properties -> C/C++ -> Preprocessor -> Preprocessor Definitions, and add at the beginning of line: `CUDNN;` + +2. To use Yolo as DLL-file in your C++ console application - open the solution `build\darknet\yolo_console_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_console_dll + + * you can run your console application from Windows Explorer `build\darknet\x64\yolo_console_dll.exe` + **use this command**: `yolo_console_dll.exe data/coco.names yolov4.cfg yolov4.weights test.mp4` + + * after launching your console application and entering the image file name - you will see info for each object: + ` ` + * to use simple OpenCV-GUI you should uncomment line `//#define OPENCV` in `yolo_console_dll.cpp`-file: [link](https://github.com/AlexeyAB/darknet/blob/a6cbaeecde40f91ddc3ea09aa26a03ab5bbf8ba8/src/yolo_console_dll.cpp#L5) + * you can see source code of simple example for detection on the video file: [link](https://github.com/AlexeyAB/darknet/blob/ab1c5f9e57b4175f29a6ef39e7e68987d3e98704/src/yolo_console_dll.cpp#L75) + +`yolo_cpp_dll.dll`-API: [link](https://github.com/AlexeyAB/darknet/blob/master/src/yolo_v2_class.hpp#L42) + +```cpp +struct bbox_t { + unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box + float prob; // confidence - probability that the object was found correctly + unsigned int obj_id; // class of object - from range [0, classes-1] + unsigned int track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) + unsigned int frames_counter;// counter of frames on which the object was detected +}; + +class Detector { +public: + Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0); + ~Detector(); + + std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false); + std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false); + static image_t load_image(std::string image_filename); + static void free_image(image_t m); + +#ifdef OPENCV + std::vector detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false); + std::shared_ptr mat_to_image_resize(cv::Mat mat) const; +#endif +}; +``` diff --git a/build/darknet/x64/cfg/cspx-p7-mish.cfg b/build/darknet/x64/cfg/cspx-p7-mish.cfg index b5713391bcc..01be7283680 100644 --- a/build/darknet/x64/cfg/cspx-p7-mish.cfg +++ b/build/darknet/x64/cfg/cspx-p7-mish.cfg @@ -2340,7 +2340,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] @@ -2352,7 +2352,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 @@ -2400,7 +2400,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] mask = 4,5,6,7 @@ -2411,7 +2411,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 @@ -2459,7 +2459,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] mask = 8,9,10,11 @@ -2470,7 +2470,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 @@ -2518,7 +2518,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] mask = 12,13,14,15 @@ -2529,7 +2529,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 @@ -2577,7 +2577,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] mask = 16,17,18,19 @@ -2588,7 +2588,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 diff --git a/build/darknet/x64/cfg/yolov4-csp.cfg b/build/darknet/x64/cfg/yolov4-csp.cfg new file mode 100644 index 00000000000..23ebfd7461d --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-csp.cfg @@ -0,0 +1,1277 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +#optimized_memory=1 + +#23:104x104 54:52x52 85:26x26 104:13x13 for 416 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=mish + +#[route] +#layers = -2 + +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=mish + +#[route] +#layers = -1,-7 + +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=0 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=4.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, -20 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -49 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/build/darknet/x64/cfg/yolov4-tiny.cfg b/build/darknet/x64/cfg/yolov4-tiny.cfg index dc6f5bfb8c6..821a34bff72 100644 --- a/build/darknet/x64/cfg/yolov4-tiny.cfg +++ b/build/darknet/x64/cfg/yolov4-tiny.cfg @@ -17,11 +17,24 @@ hue=.1 learning_rate=0.00261 burn_in=1000 + max_batches = 500200 policy=steps steps=400000,450000 scales=.1,.1 +#max_batches = 501000 +#policy=steps +#steps=500000 +#scales=0.01 + +#weights_reject_freq=1001 +#ema_alpha=0.998 +#equidistant_point=1000 +#num_sigmas_reject_badlabels=3 +#badlabels_rejection_percentage=0.1 + + [convolutional] batch_normalize=1 filters=32 @@ -230,6 +243,8 @@ random=0 resize=1.5 nms_kind=greedynms beta_nms=0.6 +#new_coords=1 +#scale_x_y = 2.0 [route] layers = -4 @@ -279,3 +294,5 @@ random=0 resize=1.5 nms_kind=greedynms beta_nms=0.6 +#new_coords=1 +#scale_x_y = 2.0 diff --git a/build/darknet/x64/cfg/yolov4x-mish.cfg b/build/darknet/x64/cfg/yolov4x-mish.cfg index 0e9fb44132d..2ff854f6dcc 100644 --- a/build/darknet/x64/cfg/yolov4x-mish.cfg +++ b/build/darknet/x64/cfg/yolov4x-mish.cfg @@ -26,7 +26,7 @@ mosaic=1 letter_box=1 -optimized_memory=1 +#optimized_memory=1 [convolutional] batch_normalize=1 @@ -1150,7 +1150,7 @@ size=1 stride=1 pad=1 filters=255 -activation=linear +activation=logistic [yolo] @@ -1159,6 +1159,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4 classes=80 num=9 jitter=.1 +scale_x_y = 2.0 objectness_smooth=0 ignore_thresh = .7 truth_thresh = 1 @@ -1172,7 +1173,7 @@ iou_loss=ciou nms_kind=diounms beta_nms=0.6 new_coords=1 -max_delta=20 +max_delta=5 [route] layers = -4 @@ -1279,7 +1280,7 @@ size=1 stride=1 pad=1 filters=255 -activation=linear +activation=logistic [yolo] @@ -1288,6 +1289,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4 classes=80 num=9 jitter=.1 +scale_x_y = 2.0 objectness_smooth=1 ignore_thresh = .7 truth_thresh = 1 @@ -1408,7 +1410,7 @@ size=1 stride=1 pad=1 filters=255 -activation=linear +activation=logistic [yolo] @@ -1417,6 +1419,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4 classes=80 num=9 jitter=.1 +scale_x_y = 2.0 objectness_smooth=1 ignore_thresh = .7 truth_thresh = 1 diff --git a/cfg/cspx-p7-mish.cfg b/cfg/cspx-p7-mish.cfg index b5713391bcc..01be7283680 100644 --- a/cfg/cspx-p7-mish.cfg +++ b/cfg/cspx-p7-mish.cfg @@ -2340,7 +2340,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] @@ -2352,7 +2352,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 @@ -2400,7 +2400,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] mask = 4,5,6,7 @@ -2411,7 +2411,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 @@ -2459,7 +2459,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] mask = 8,9,10,11 @@ -2470,7 +2470,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 @@ -2518,7 +2518,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] mask = 12,13,14,15 @@ -2529,7 +2529,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 @@ -2577,7 +2577,7 @@ size=1 stride=1 pad=1 filters=340 -activation=linear +activation=logistic [yolo] mask = 16,17,18,19 @@ -2588,7 +2588,7 @@ ignore_thresh = .7 truth_thresh = 1 #random=1 resize=1.5 -scale_x_y = 1.05 +scale_x_y = 2.0 jitter=.1 objectness_smooth=1 iou_thresh=0.2 diff --git a/cfg/yolov4-csp.cfg b/cfg/yolov4-csp.cfg new file mode 100644 index 00000000000..23ebfd7461d --- /dev/null +++ b/cfg/yolov4-csp.cfg @@ -0,0 +1,1277 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +#optimized_memory=1 + +#23:104x104 54:52x52 85:26x26 104:13x13 for 416 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=mish + +#[route] +#layers = -2 + +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=mish + +#[route] +#layers = -1,-7 + +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=0 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=4.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, -20 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -49 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/cfg/yolov4x-mish.cfg b/cfg/yolov4x-mish.cfg index 0e9fb44132d..2ff854f6dcc 100644 --- a/cfg/yolov4x-mish.cfg +++ b/cfg/yolov4x-mish.cfg @@ -26,7 +26,7 @@ mosaic=1 letter_box=1 -optimized_memory=1 +#optimized_memory=1 [convolutional] batch_normalize=1 @@ -1150,7 +1150,7 @@ size=1 stride=1 pad=1 filters=255 -activation=linear +activation=logistic [yolo] @@ -1159,6 +1159,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4 classes=80 num=9 jitter=.1 +scale_x_y = 2.0 objectness_smooth=0 ignore_thresh = .7 truth_thresh = 1 @@ -1172,7 +1173,7 @@ iou_loss=ciou nms_kind=diounms beta_nms=0.6 new_coords=1 -max_delta=20 +max_delta=5 [route] layers = -4 @@ -1279,7 +1280,7 @@ size=1 stride=1 pad=1 filters=255 -activation=linear +activation=logistic [yolo] @@ -1288,6 +1289,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4 classes=80 num=9 jitter=.1 +scale_x_y = 2.0 objectness_smooth=1 ignore_thresh = .7 truth_thresh = 1 @@ -1408,7 +1410,7 @@ size=1 stride=1 pad=1 filters=255 -activation=linear +activation=logistic [yolo] @@ -1417,6 +1419,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4 classes=80 num=9 jitter=.1 +scale_x_y = 2.0 objectness_smooth=1 ignore_thresh = .7 truth_thresh = 1 diff --git a/include/darknet.h b/include/darknet.h index 3f90f0ad875..040d9e35999 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -414,6 +414,10 @@ struct layer { float *scales; float *scale_updates; + float *weights_ema; + float *biases_ema; + float *scales_ema; + float *weights; float *weight_updates; @@ -697,8 +701,15 @@ typedef struct network { int n; int batch; uint64_t *seen; + float *badlabels_reject_threshold; + float *delta_rolling_max; float *delta_rolling_avg; + float *delta_rolling_std; + int weights_reject_freq; int equidistant_point; + float badlabels_rejection_percentage; + float num_sigmas_reject_badlabels; + float ema_alpha; int *cur_iteration; float loss_scale; int *t; diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 130f57fabb0..1d52dd1d23c 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -566,6 +566,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (train) { l.weight_updates = (float*)xcalloc(l.nweights, sizeof(float)); l.bias_updates = (float*)xcalloc(n, sizeof(float)); + + l.weights_ema = (float*)xcalloc(l.nweights, sizeof(float)); + l.biases_ema = (float*)xcalloc(n, sizeof(float)); } } @@ -637,6 +640,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.scales[i] = 1; } if (train) { + l.scales_ema = (float*)xcalloc(n, sizeof(float)); l.scale_updates = (float*)xcalloc(n, sizeof(float)); l.mean = (float*)xcalloc(n, sizeof(float)); diff --git a/src/darknet.c b/src/darknet.c index 92a9c193035..13ab75f3d38 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -178,7 +178,7 @@ void partial(char *cfgfile, char *weightfile, char *outfile, int max) } *net.seen = 0; *net.cur_iteration = 0; - save_weights_upto(net, outfile, max); + save_weights_upto(net, outfile, max, 0); } #include "convolutional_layer.h" diff --git a/src/demo.c b/src/demo.c index c7bf4a5debe..dfbc3c5e796 100644 --- a/src/demo.c +++ b/src/demo.c @@ -171,6 +171,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int if(weightfile){ load_weights(&net, weightfile); } + if (net.letter_box) letter_box = 1; net.benchmark_layers = benchmark_layers; fuse_conv_batchnorm(net); calculate_binary_weights(net); diff --git a/src/detector.c b/src/detector.c index 88a04acecb7..5c84bfcb542 100644 --- a/src/detector.c +++ b/src/detector.c @@ -389,7 +389,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) { //if (i % 100 == 0) { - if (iteration >= (iter_save + 1000) || iteration % 1000 == 0) { + if ((iteration >= (iter_save + 10000) || iteration % 10000 == 0) || + (iteration >= (iter_save + 1000) || iteration % 1000 == 0) && net.max_batches < 10000) + { iter_save = iteration; #ifdef GPU if (ngpus != 1) sync_nets(nets, ngpus, 0); @@ -407,6 +409,12 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i char buff[256]; sprintf(buff, "%s/%s_last.weights", backup_directory, base); save_weights(net, buff); + + if (net.ema_alpha && is_ema_initialized(net)) { + sprintf(buff, "%s/%s_ema.weights", backup_directory, base); + save_weights_upto(net, buff, net.n, 1); + printf(" EMA weights are saved to the file: %s \n", buff); + } } free_data(train); } @@ -1007,6 +1015,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa args.w = net.w; args.h = net.h; args.c = net.c; + letter_box = net.letter_box; if (letter_box) args.type = LETTERBOX_DATA; else args.type = IMAGE_DATA; @@ -1611,6 +1620,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam if (weightfile) { load_weights(&net, weightfile); } + if (net.letter_box) letter_box = 1; net.benchmark_layers = benchmark_layers; fuse_conv_batchnorm(net); calculate_binary_weights(net); diff --git a/src/layer.c b/src/layer.c index fe3827d2ad5..032a24e0f2c 100644 --- a/src/layer.c +++ b/src/layer.c @@ -92,6 +92,9 @@ void free_layer_custom(layer l, int keep_cudnn_desc) if (l.bias_updates) free(l.bias_updates), l.bias_updates = NULL; if (l.scales) free(l.scales), l.scales = NULL; if (l.scale_updates) free(l.scale_updates), l.scale_updates = NULL; + if (l.biases_ema) free(l.biases_ema), l.biases = NULL; + if (l.scales_ema) free(l.scales_ema), l.scales = NULL; + if (l.weights_ema) free(l.weights_ema), l.weights = NULL; if (l.weights) free(l.weights), l.weights = NULL; if (l.weight_updates) free(l.weight_updates), l.weight_updates = NULL; if (l.align_bit_weights) free(l.align_bit_weights); diff --git a/src/network.c b/src/network.c index a59f2e478b1..9dec1df5761 100644 --- a/src/network.c +++ b/src/network.c @@ -245,7 +245,10 @@ network make_network(int n) net.n = n; net.layers = (layer*)xcalloc(net.n, sizeof(layer)); net.seen = (uint64_t*)xcalloc(1, sizeof(uint64_t)); - net.delta_rolling_avg = (float*)xcalloc(1, sizeof(float)); + net.badlabels_reject_threshold = (float*)xcalloc(1, sizeof(float)); + net.delta_rolling_max = (float*)xcalloc(1, sizeof(float)); + net.delta_rolling_avg = (float*)xcalloc(1, sizeof(float)); + net.delta_rolling_std = (float*)xcalloc(1, sizeof(float)); net.cur_iteration = (int*)xcalloc(1, sizeof(int)); net.total_bbox = (int*)xcalloc(1, sizeof(int)); net.rewritten_bbox = (int*)xcalloc(1, sizeof(int)); @@ -422,6 +425,45 @@ float train_network_waitkey(network net, data d, int wait_key) #else // GPU update_network(net); #endif // GPU + + int ema_start_point = net.max_batches / 2; + + if (net.ema_alpha && (*net.cur_iteration) >= ema_start_point) + { + int ema_period = (net.max_batches - ema_start_point - 1000) * (1.0 - net.ema_alpha); + int ema_apply_point = net.max_batches - 1000; + + if (!is_ema_initialized(net)) + { + ema_update(net, 0); // init EMA + printf(" EMA initialization \n"); + } + + if ((*net.cur_iteration) == ema_apply_point) + { + ema_apply(net); // apply EMA (BN rolling mean/var recalculation is required) + printf(" ema_apply() \n"); + } + else + if ((*net.cur_iteration) < ema_apply_point)// && (*net.cur_iteration) % ema_period == 0) + { + ema_update(net, net.ema_alpha); // update EMA + printf(" ema_update(), ema_alpha = %f \n", net.ema_alpha); + } + } + + + int reject_stop_point = net.max_batches*3/4; + + if ((*net.cur_iteration) < reject_stop_point && + net.weights_reject_freq && + (*net.cur_iteration) % net.weights_reject_freq == 0) + { + float sim_threshold = 0.4; + reject_similar_weights(net, sim_threshold); + } + + free(X); free(y); return (float)sum/(n*batch); @@ -1182,7 +1224,10 @@ void free_network(network net) free(net.scales); free(net.steps); free(net.seen); - free(net.delta_rolling_avg); + free(net.badlabels_reject_threshold); + free(net.delta_rolling_max); + free(net.delta_rolling_avg); + free(net.delta_rolling_std); free(net.cur_iteration); free(net.total_bbox); free(net.rewritten_bbox); @@ -1483,3 +1528,138 @@ void restore_network_recurrent_state(network net) if (net.layers[k].type == CRNN) free_state_crnn(net.layers[k]); } } + + +int is_ema_initialized(network net) +{ + int i; + for (i = 0; i < net.n; ++i) { + layer l = net.layers[i]; + if (l.type == CONVOLUTIONAL) { + int k; + if (l.weights_ema) { + for (k = 0; k < l.nweights; ++k) { + if (l.weights_ema[k] != 0) return 1; + } + } + } + } + + return 0; +} + +void ema_update(network net, float ema_alpha) +{ + int i; + for (i = 0; i < net.n; ++i) { + layer l = net.layers[i]; + if (l.type == CONVOLUTIONAL) { +#ifdef GPU + if (gpu_index >= 0) { + pull_convolutional_layer(l); + } +#endif + int k; + if (l.weights_ema) { + for (k = 0; k < l.nweights; ++k) { + l.weights_ema[k] = ema_alpha * l.weights_ema[k] + (1 - ema_alpha) * l.weights[k]; + } + } + + for (k = 0; k < l.n; ++k) { + if (l.biases_ema) l.biases_ema[k] = ema_alpha * l.biases_ema[k] + (1 - ema_alpha) * l.biases[k]; + if (l.scales_ema) l.scales_ema[k] = ema_alpha * l.scales_ema[k] + (1 - ema_alpha) * l.scales[k]; + } + } + } +} + + +void ema_apply(network net) +{ + int i; + for (i = 0; i < net.n; ++i) { + layer l = net.layers[i]; + if (l.type == CONVOLUTIONAL) { + int k; + if (l.weights_ema) { + for (k = 0; k < l.nweights; ++k) { + l.weights[k] = l.weights_ema[k]; + } + } + + for (k = 0; k < l.n; ++k) { + if (l.biases_ema) l.biases[k] = l.biases_ema[k]; + if (l.scales_ema) l.scales[k] = l.scales_ema[k]; + } + +#ifdef GPU + if (gpu_index >= 0) { + push_convolutional_layer(l); + } +#endif + } + } +} + + + +void reject_similar_weights(network net, float sim_threshold) +{ + int i; + for (i = 0; i < net.n; ++i) { + layer l = net.layers[i]; + if (i == 0) continue; + if (net.n > i + 1) if (net.layers[i + 1].type == YOLO) continue; + if (net.n > i + 2) if (net.layers[i + 2].type == YOLO) continue; + if (net.n > i + 3) if (net.layers[i + 3].type == YOLO) continue; + + if (l.type == CONVOLUTIONAL && l.activation != LINEAR) { +#ifdef GPU + if (gpu_index >= 0) { + pull_convolutional_layer(l); + } +#endif + int k, j; + float max_sim = -1000; + int max_sim_index = 0; + int max_sim_index2 = 0; + int filter_size = l.size*l.size*l.c; + for (k = 0; k < l.n; ++k) + { + for (j = k+1; j < l.n; ++j) + { + int w1 = k; + int w2 = j; + + float sim = cosine_similarity(&l.weights[filter_size*w1], &l.weights[filter_size*w2], filter_size); + if (sim > max_sim) { + max_sim = sim; + max_sim_index = w1; + max_sim_index2 = w2; + } + } + } + + printf(" reject_similar_weights: i = %d, l.n = %d, w1 = %d, w2 = %d, sim = %f, thresh = %f \n", + i, l.n, max_sim_index, max_sim_index2, max_sim, sim_threshold); + + if (max_sim > sim_threshold) { + printf(" rejecting... \n"); + float scale = sqrt(2. / (l.size*l.size*l.c / l.groups)); + + for (k = 0; k < filter_size; ++k) { + l.weights[max_sim_index*filter_size + k] = scale*rand_uniform(-1, 1); + } + if (l.biases) l.biases[max_sim_index] = 0.0f; + if (l.scales) l.scales[max_sim_index] = 1.0f; + } + +#ifdef GPU + if (gpu_index >= 0) { + push_convolutional_layer(l); + } +#endif + } + } +} diff --git a/src/network.h b/src/network.h index 42d68d6c3c3..7661c8ef806 100644 --- a/src/network.h +++ b/src/network.h @@ -171,6 +171,11 @@ void free_network_recurrent_state(network net); void randomize_network_recurrent_state(network net); void remember_network_recurrent_state(network net); void restore_network_recurrent_state(network net); +int is_ema_initialized(network net); +void ema_update(network net, float ema_alpha); +void ema_apply(network net); +void reject_similar_weights(network net, float sim_threshold); + #ifdef __cplusplus } diff --git a/src/parser.c b/src/parser.c index 74d60c8e306..eb41e91457b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -454,7 +454,7 @@ layer parse_yolo(list *options, size_params params) } //assert(l.outputs == params.inputs); - l.show_details = option_find_int_quiet(options, "show_details", 0); + l.show_details = option_find_int_quiet(options, "show_details", 1); l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10 char *cpc = option_find_str(options, "counters_per_class", 0); l.classes_multipliers = get_classes_multipliers(cpc, classes, l.max_delta); @@ -1155,8 +1155,15 @@ void parse_net_options(list *options, network *net) net->batch *= net->time_steps; // mini_batch * time_steps net->subdivisions = subdivs; // number of mini_batches + net->weights_reject_freq = option_find_int_quiet(options, "weights_reject_freq", 0); net->equidistant_point = option_find_int_quiet(options, "equidistant_point", 0); + net->badlabels_rejection_percentage = option_find_float_quiet(options, "badlabels_rejection_percentage", 0); + net->num_sigmas_reject_badlabels = option_find_float_quiet(options, "num_sigmas_reject_badlabels", 0); + net->ema_alpha = option_find_float_quiet(options, "ema_alpha", 0); + *net->badlabels_reject_threshold = 0; + *net->delta_rolling_max = 0; *net->delta_rolling_avg = 0; + *net->delta_rolling_std = 0; *net->seen = 0; *net->cur_iteration = 0; net->loss_scale = option_find_float_quiet(options, "loss_scale", 1); @@ -1819,6 +1826,31 @@ void save_convolutional_weights(layer l, FILE *fp) //} } +void save_convolutional_weights_ema(layer l, FILE *fp) +{ + if (l.binary) { + //save_convolutional_weights_binary(l, fp); + //return; + } +#ifdef GPU + if (gpu_index >= 0) { + pull_convolutional_layer(l); + } +#endif + int num = l.nweights; + fwrite(l.biases_ema, sizeof(float), l.n, fp); + if (l.batch_normalize) { + fwrite(l.scales_ema, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + fwrite(l.weights_ema, sizeof(float), num, fp); + //if(l.adam){ + // fwrite(l.m, sizeof(float), num, fp); + // fwrite(l.v, sizeof(float), num, fp); + //} +} + void save_batchnorm_weights(layer l, FILE *fp) { #ifdef GPU @@ -1848,7 +1880,7 @@ void save_connected_weights(layer l, FILE *fp) } } -void save_weights_upto(network net, char *filename, int cutoff) +void save_weights_upto(network net, char *filename, int cutoff, int save_ema) { #ifdef GPU if(net.gpu_index >= 0){ @@ -1872,7 +1904,12 @@ void save_weights_upto(network net, char *filename, int cutoff) for(i = 0; i < net.n && i < cutoff; ++i){ layer l = net.layers[i]; if (l.type == CONVOLUTIONAL && l.share_layer == NULL) { - save_convolutional_weights(l, fp); + if (save_ema) { + save_convolutional_weights_ema(l, fp); + } + else { + save_convolutional_weights(l, fp); + } } if (l.type == SHORTCUT && l.nweights > 0) { save_shortcut_weights(l, fp); } if(l.type == CONNECTED){ @@ -1935,7 +1972,7 @@ void save_weights_upto(network net, char *filename, int cutoff) } void save_weights(network net, char *filename) { - save_weights_upto(net, filename, net.n); + save_weights_upto(net, filename, net.n, 0); } void transpose_matrix(float *a, int rows, int cols) diff --git a/src/parser.h b/src/parser.h index 5e0d3fd1295..05241167d20 100644 --- a/src/parser.h +++ b/src/parser.h @@ -9,7 +9,7 @@ network parse_network_cfg(char *filename); network parse_network_cfg_custom(char *filename, int batch, int time_steps); void save_network(network net, char *filename); void save_weights(network net, char *filename); -void save_weights_upto(network net, char *filename, int cutoff); +void save_weights_upto(network net, char *filename, int cutoff, int save_ema); void save_weights_double(network net, char *filename); void load_weights(network *net, char *filename); void load_weights_upto(network *net, char *filename, int cutoff); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index d756d12242a..cd06c0fb7ad 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -138,8 +138,8 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw // w = ln(t.w * net.w / anchors_w); // w - output of previous conv-layer // h = ln(t.h * net.h / anchors_h); // h - output of previous conv-layer if (new_coords) { - b.x = (i + x[index + 0 * stride] * 2 - 0.5) / lw; - b.y = (j + x[index + 1 * stride] * 2 - 0.5) / lh; + b.x = (i + x[index + 0 * stride]) / lw; + b.y = (j + x[index + 1 * stride]) / lh; b.w = x[index + 2 * stride] * x[index + 2 * stride] * 4 * biases[2 * n] / w; b.h = x[index + 3 * stride] * x[index + 3 * stride] * 4 * biases[2 * n + 1] / h; } @@ -197,8 +197,8 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, float th = log(truth.h*h / biases[2 * n + 1]); if (new_coords) { - tx = (truth.x*lw - i + 0.5) / 2; - ty = (truth.y*lh - j + 0.5) / 2; + //tx = (truth.x*lw - i + 0.5) / 2; + //ty = (truth.y*lh - j + 0.5) / 2; tw = sqrt(truth.w*w / (4 * biases[2 * n])); th = sqrt(truth.h*h / (4 * biases[2 * n + 1])); } @@ -230,16 +230,28 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, float dw = all_ious.dx_iou.dl; float dh = all_ious.dx_iou.dr; + // predict exponential, apply gradient of e^delta_t ONLY for w,h if (new_coords) { - dw *= 8 * x[index + 2 * stride]; - dh *= 8 * x[index + 3 * stride]; + //dw *= 8 * x[index + 2 * stride]; + //dh *= 8 * x[index + 3 * stride]; + //dw *= 8 * x[index + 2 * stride] * biases[2 * n] / w; + //dh *= 8 * x[index + 3 * stride] * biases[2 * n + 1] / h; + + //float grad_w = 8 * exp(-x[index + 2 * stride]) / pow(exp(-x[index + 2 * stride]) + 1, 3); + //float grad_h = 8 * exp(-x[index + 3 * stride]) / pow(exp(-x[index + 3 * stride]) + 1, 3); + //dw *= grad_w; + //dh *= grad_h; } else { dw *= exp(x[index + 2 * stride]); dh *= exp(x[index + 3 * stride]); } + + //dw *= exp(x[index + 2 * stride]); + //dh *= exp(x[index + 3 * stride]); + // normalize iou weight dx *= iou_normalizer; dy *= iou_normalizer; @@ -368,6 +380,8 @@ typedef struct train_yolo_args { int b; float tot_iou; + float tot_giou_loss; + float tot_iou_loss; int count; int class_count; } train_yolo_args; @@ -388,8 +402,8 @@ void *process_batch(void* ptr) float tot_giou = 0; float tot_diou = 0; float tot_ciou = 0; - float tot_iou_loss = 0; - float tot_giou_loss = 0; + //float tot_iou_loss = 0; + //float tot_giou_loss = 0; float tot_diou_loss = 0; float tot_ciou_loss = 0; float recall = 0; @@ -540,10 +554,10 @@ void *process_batch(void* ptr) // range is 0 <= 1 args->tot_iou += all_ious.iou; - tot_iou_loss += 1 - all_ious.iou; + args->tot_iou_loss += 1 - all_ious.iou; // range is -1 <= giou <= 1 tot_giou += all_ious.giou; - tot_giou_loss += 1 - all_ious.giou; + args->tot_giou_loss += 1 - all_ious.giou; tot_diou += all_ious.diou; tot_diou_loss += 1 - all_ious.diou; @@ -592,10 +606,10 @@ void *process_batch(void* ptr) // range is 0 <= 1 args->tot_iou += all_ious.iou; - tot_iou_loss += 1 - all_ious.iou; + args->tot_iou_loss += 1 - all_ious.iou; // range is -1 <= giou <= 1 tot_giou += all_ious.giou; - tot_giou_loss += 1 - all_ious.giou; + args->tot_giou_loss += 1 - all_ious.giou; tot_diou += all_ious.diou; tot_diou_loss += 1 - all_ious.diou; @@ -656,16 +670,16 @@ void forward_yolo_layer(const layer l, network_state state) #ifndef GPU for (b = 0; b < l.batch; ++b) { for (n = 0; n < l.n; ++n) { - int index = entry_index(l, b, n*l.w*l.h, 0); + int bbox_index = entry_index(l, b, n*l.w*l.h, 0); if (l.new_coords) { - activate_array(l.output + index, 4 * l.w*l.h, LOGISTIC); // x,y,w,h + //activate_array(l.output + bbox_index, 4 * l.w*l.h, LOGISTIC); // x,y,w,h } else { - activate_array(l.output + index, 2 * l.w*l.h, LOGISTIC); // x,y, + activate_array(l.output + bbox_index, 2 * l.w*l.h, LOGISTIC); // x,y, + int obj_index = entry_index(l, b, n*l.w*l.h, 4); + activate_array(l.output + obj_index, (1 + l.classes)*l.w*l.h, LOGISTIC); } - scal_add_cpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1); // scale x,y - index = entry_index(l, b, n*l.w*l.h, 4); - activate_array(l.output + index, (1 + l.classes)*l.w*l.h, LOGISTIC); + scal_add_cpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + bbox_index, 1); // scale x,y } } #endif @@ -708,6 +722,8 @@ void forward_yolo_layer(const layer l, network_state state) yolo_args[b].b = b; yolo_args[b].tot_iou = 0; + yolo_args[b].tot_iou_loss = 0; + yolo_args[b].tot_giou_loss = 0; yolo_args[b].count = 0; yolo_args[b].class_count = 0; @@ -719,6 +735,8 @@ void forward_yolo_layer(const layer l, network_state state) pthread_join(threads[b], 0); tot_iou += yolo_args[b].tot_iou; + tot_iou_loss += yolo_args[b].tot_iou_loss; + tot_giou_loss += yolo_args[b].tot_giou_loss; count += yolo_args[b].count; class_count += yolo_args[b].class_count; } @@ -728,24 +746,113 @@ void forward_yolo_layer(const layer l, network_state state) // Search for an equidistant point from the distant boundaries of the local minimum int iteration_num = get_current_iteration(state.net); + const int start_point = state.net.max_batches * 3 / 4; //printf(" equidistant_point ep = %d, it = %d \n", state.net.equidistant_point, iteration_num); - if (state.net.equidistant_point && state.net.equidistant_point < iteration_num) { - float progress_it = iteration_num - state.net.equidistant_point; - float progress = progress_it / (state.net.max_batches - state.net.equidistant_point); - float loss_threshold = (*state.net.delta_rolling_avg) * progress; - printf(" RUN equidistant_point loss_threshold = %f, ep = %d, it = %d \n", loss_threshold, state.net.equidistant_point, iteration_num); + if ((state.net.badlabels_rejection_percentage && start_point < iteration_num) || + (state.net.num_sigmas_reject_badlabels && start_point < iteration_num) || + (state.net.equidistant_point && state.net.equidistant_point < iteration_num)) + { + const float progress_it = iteration_num - state.net.equidistant_point; + const float progress = progress_it / (state.net.max_batches - state.net.equidistant_point); + float ep_loss_threshold = (*state.net.delta_rolling_avg) * progress; float cur_max = 0; + float cur_avg = 0; + float counter = 0; for (i = 0; i < l.batch * l.outputs; ++i) { - if (cur_max < fabs(l.delta[i])) - cur_max = fabs(l.delta[i]); - if (fabs(l.delta[i]) < loss_threshold) - l.delta[i] = 0; + if (l.delta[i] != 0) { + counter++; + cur_avg += fabs(l.delta[i]); + + if (cur_max < fabs(l.delta[i])) + cur_max = fabs(l.delta[i]); + } + } + + cur_avg = cur_avg / counter; + + if (*state.net.delta_rolling_max == 0) *state.net.delta_rolling_max = cur_max; + *state.net.delta_rolling_max = *state.net.delta_rolling_max * 0.99 + cur_max * 0.01; + *state.net.delta_rolling_avg = *state.net.delta_rolling_avg * 0.99 + cur_avg * 0.01; + + // reject high loss to filter bad labels + if (state.net.num_sigmas_reject_badlabels && start_point < iteration_num) + { + const float rolling_std = (*state.net.delta_rolling_std); + const float rolling_max = (*state.net.delta_rolling_max); + const float rolling_avg = (*state.net.delta_rolling_avg); + const float progress_badlabels = (float)(iteration_num - start_point) / (start_point); + + float cur_std = 0; + float counter = 0; + for (i = 0; i < l.batch * l.outputs; ++i) { + if (l.delta[i] != 0) { + counter++; + cur_std += pow(l.delta[i] - rolling_avg, 2); + } + } + cur_std = sqrt(cur_std / counter); + + *state.net.delta_rolling_std = *state.net.delta_rolling_std * 0.99 + cur_std * 0.01; + + float final_badlebels_threshold = rolling_avg + rolling_std * state.net.num_sigmas_reject_badlabels; + float badlabels_threshold = rolling_max - progress_badlabels * fabs(rolling_max - final_badlebels_threshold); + badlabels_threshold = max_val_cmp(final_badlebels_threshold, badlabels_threshold); + for (i = 0; i < l.batch * l.outputs; ++i) { + if (fabs(l.delta[i]) > badlabels_threshold) + l.delta[i] = 0; + } + printf(" rolling_std = %f, rolling_max = %f, rolling_avg = %f \n", rolling_std, rolling_max, rolling_avg); + printf(" badlabels loss_threshold = %f, start_it = %d, progress = %f \n", badlabels_threshold, start_point, progress_badlabels *100); + + ep_loss_threshold = min_val_cmp(final_badlebels_threshold, rolling_avg) * progress; + } + + + // reject some percent of the highest deltas to filter bad labels + if (state.net.badlabels_rejection_percentage && start_point < iteration_num) { + if (*state.net.badlabels_reject_threshold == 0) + *state.net.badlabels_reject_threshold = *state.net.delta_rolling_max; + + printf(" badlabels_reject_threshold = %f \n", *state.net.badlabels_reject_threshold); + + const float num_deltas_per_anchor = (l.classes + 4 + 1); + float counter_reject = 0; + float counter_all = 0; + for (i = 0; i < l.batch * l.outputs; ++i) { + if (l.delta[i] != 0) { + counter_all++; + if (fabs(l.delta[i]) > (*state.net.badlabels_reject_threshold)) { + counter_reject++; + l.delta[i] = 0; + } + } + } + float cur_percent = 100 * (counter_reject*num_deltas_per_anchor / counter_all); + if (cur_percent > state.net.badlabels_rejection_percentage) { + *state.net.badlabels_reject_threshold += 0.01; + printf(" increase!!! \n"); + } + else if (*state.net.badlabels_reject_threshold > 0.01) { + *state.net.badlabels_reject_threshold -= 0.01; + printf(" decrease!!! \n"); + } + + printf(" badlabels_reject_threshold = %f, cur_percent = %f, badlabels_rejection_percentage = %f, delta_rolling_max = %f \n", + *state.net.badlabels_reject_threshold, cur_percent, state.net.badlabels_rejection_percentage, *state.net.delta_rolling_max); + } + + + // reject low loss to find equidistant point + if (state.net.equidistant_point && state.net.equidistant_point < iteration_num) { + printf(" equidistant_point loss_threshold = %f, start_it = %d, progress = %3.1f %% \n", ep_loss_threshold, state.net.equidistant_point, progress * 100); + for (i = 0; i < l.batch * l.outputs; ++i) { + if (fabs(l.delta[i]) < ep_loss_threshold) + l.delta[i] = 0; + } } - - *state.net.delta_rolling_avg = *state.net.delta_rolling_avg * 0.99 + cur_max * 0.01; } if (count == 0) count = 1; @@ -790,7 +897,7 @@ void forward_yolo_layer(const layer l, network_state state) float avg_iou_loss = 0; *(l.cost) = loss; - /* + // gIOU loss + MSE (objectness) loss if (l.iou_loss == MSE) { *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); @@ -807,7 +914,7 @@ void forward_yolo_layer(const layer l, network_state state) } *(l.cost) = avg_iou_loss + classification_loss; } - */ + loss /= l.batch; classification_loss /= l.batch; @@ -1068,20 +1175,21 @@ void forward_yolo_layer_gpu(const layer l, network_state state) int b, n; for (b = 0; b < l.batch; ++b){ for(n = 0; n < l.n; ++n){ - int index = entry_index(l, b, n*l.w*l.h, 0); + int bbox_index = entry_index(l, b, n*l.w*l.h, 0); // y = 1./(1. + exp(-x)) // x = ln(y/(1-y)) // ln - natural logarithm (base = e) // if(y->1) x -> inf // if(y->0) x -> -inf if (l.new_coords) { - activate_array_ongpu(l.output_gpu + index, 4 * l.w*l.h, LOGISTIC); // x,y,w,h + //activate_array_ongpu(l.output_gpu + bbox_index, 4 * l.w*l.h, LOGISTIC); // x,y,w,h } else { - activate_array_ongpu(l.output_gpu + index, 2 * l.w*l.h, LOGISTIC); // x,y + activate_array_ongpu(l.output_gpu + bbox_index, 2 * l.w*l.h, LOGISTIC); // x,y + + int obj_index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_ongpu(l.output_gpu + obj_index, (1 + l.classes)*l.w*l.h, LOGISTIC); // classes and objectness } - if (l.scale_x_y != 1) scal_add_ongpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1); // scale x,y - index = entry_index(l, b, n*l.w*l.h, 4); - activate_array_ongpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); // classes and objectness + if (l.scale_x_y != 1) scal_add_ongpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + bbox_index, 1); // scale x,y } } if(!state.train || l.onlyforward){ diff --git a/src/yolo_layer.h b/src/yolo_layer.h index 8de44b45517..08883b0f244 100644 --- a/src/yolo_layer.h +++ b/src/yolo_layer.h @@ -20,7 +20,7 @@ void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth #ifdef GPU void forward_yolo_layer_gpu(const layer l, network_state state); -void backward_yolo_layer_gpu(layer l, network_state state); +void backward_yolo_layer_gpu(const layer l, network_state state); #endif #ifdef __cplusplus From 4fddf7c9457c64af65856d7de7bf0a40e4ada8fb Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Tue, 15 Dec 2020 09:29:42 +0800 Subject: [PATCH 14/20] upate --- Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 743404c1ce2..a089031f779 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,14 @@ -GPU=1 -CUDNN=1 +GPU=0 +CUDNN=0 CUDNN_HALF=0 -OPENCV=1 +OPENCV=0 AVX=0 OPENMP=0 -LIBSO=1 +LIBSO=0 ZED_CAMERA=0 ZED_CAMERA_v2_8=0 -STREAM=1 -FFMPEG=1 +STREAM=0 +FFMPEG=0 # set GPU=1 and CUDNN=1 to speedup on GPU # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher @@ -17,7 +17,7 @@ FFMPEG=1 # set ZED_CAMERA_v2_8=1 to enable ZED SDK 2.X USE_CPP=0 -DEBUG=1 +DEBUG=0 ARCH= -gencode arch=compute_35,code=sm_35 \ -gencode arch=compute_50,code=[sm_50,compute_50] \ From b141cef077c79e63bbc21587622b67a8f5244b88 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Tue, 15 Dec 2020 09:47:11 +0800 Subject: [PATCH 15/20] minor fix --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a089031f779..85925cd238b 100644 --- a/Makefile +++ b/Makefile @@ -78,7 +78,7 @@ endif CPP=g++ -std=c++11 NVCC=nvcc OPTS=-Ofast -LDFLAGS=-L/usr/local/lib -lm -pthread +LDFLAGS= -lm -pthread COMMON= -Iinclude/ -I3rdparty/stb/include CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -fpermissive @@ -97,7 +97,7 @@ COMMON+= `pkg-config --cflags libswresample libswscale libavutil libavcodec liba endif ifeq ($(DEBUG), 1) -OPTS= -O0 -g +#OPTS= -O0 -g #OPTS= -Og -g COMMON+= -DDEBUG CFLAGS+= -DDEBUG From 3298cbe60e132df17e1496a7477295cfa69fbda0 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Tue, 15 Dec 2020 10:26:05 +0800 Subject: [PATCH 16/20] update --- .github/workflows/ccpp.yml | 1 + .github/workflows/ccpp.yml.bk | 594 ++++++++++++++++++++++++++++++++++ 2 files changed, 595 insertions(+) create mode 100644 .github/workflows/ccpp.yml.bk diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 60482a5a355..3f3cd4b062a 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -82,6 +82,7 @@ jobs: - name: Restore from cache and run vcpkg env: + ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true' vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_linux.diff uses: lukka/run-vcpkg@v2 with: diff --git a/.github/workflows/ccpp.yml.bk b/.github/workflows/ccpp.yml.bk new file mode 100644 index 00000000000..60482a5a355 --- /dev/null +++ b/.github/workflows/ccpp.yml.bk @@ -0,0 +1,594 @@ +name: Darknet Continuous Integration + +on: [push, pull_request] + +jobs: + ubuntu-makefile: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install libopencv-dev + + - name: 'Install CUDA' + run: | + wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb + sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub + sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb + wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb + sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb + sudo apt update + sudo apt-get install -y --no-install-recommends cuda-compiler-10-2 cuda-libraries-dev-10-2 cuda-driver-dev-10-2 cuda-cudart-dev-10-2 cuda-curand-dev-10-2 + sudo apt-get install -y --no-install-recommends libcudnn7-dev + sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/libcuda.so + sudo ln -s /usr/local/cuda-10.2 /usr/local/cuda + export PATH=/usr/local/cuda/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH + nvcc --version + gcc --version + + - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0' + run: | + make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 -j 8 + make clean + - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 DEBUG=1' + run: | + make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 DEBUG=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 AVX=1' + run: | + make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 AVX=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=1' + run: | + make LIBSO=1 GPU=0 CUDNN=0 OPENCV=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1' + run: | + export PATH=/usr/local/cuda/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH + make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1' + run: | + export PATH=/usr/local/cuda/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH + make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 USE_CPP=1' + run: | + export PATH=/usr/local/cuda/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH + make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 USE_CPP=1 -j 8 + make clean + + + ubuntu-vcpkg: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install yasm + + - uses: lukka/get-cmake@latest + + - name: Restore from cache and run vcpkg + env: + vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_linux.diff + uses: lukka/run-vcpkg@v2 + with: + vcpkgArguments: '@${{ env.vcpkgResponseFile }}' + vcpkgDirectory: '${{ github.workspace }}/vcpkg' + vcpkgGitCommitId: '8121b4ec3d6a11353daf7639ed9082a78e617a2e' + appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }} + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeBuildType: 'Release' + cmakeAppendedArgs: "-DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release" + buildWithCMakeArgs: '--target install' + + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: cfg + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: data + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: ${{ github.workspace }}/*dark* + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: ${{ github.workspace }}/uselib* + + + ubuntu: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install libopencv-dev + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--target install' + + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: cfg + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: data + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: ${{ github.workspace }}/*dark* + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: ${{ github.workspace }}/uselib* + + + ubuntu-cuda: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install libopencv-dev + + - uses: lukka/get-cmake@latest + + - name: 'Install CUDA' + env: + CUDACXX: "/usr/local/cuda-10.2/bin/nvcc" + CUDA_PATH: "/usr/local/cuda-10.2" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda-10.2" + LD_LIBRARY_PATH: "/usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/lib64/stubs:$LD_LIBRARY_PATH" + run: | + wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb + sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub + sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb + wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb + sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb + sudo apt update + sudo apt-get install -y --no-install-recommends cuda-compiler-10-2 cuda-libraries-dev-10-2 cuda-driver-dev-10-2 cuda-cudart-dev-10-2 cuda-curand-dev-10-2 + sudo apt-get install -y --no-install-recommends libcudnn7-dev + sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/stubs/libcuda.so.1 + + - name: 'Build with CMake and Ninja' + env: + CUDACXX: "/usr/local/cuda-10.2/bin/nvcc" + CUDA_PATH: "/usr/local/cuda-10.2" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda-10.2" + LD_LIBRARY_PATH: "/usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/lib64/stubs:$LD_LIBRARY_PATH" + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--target install' + + - uses: actions/upload-artifact@v2 + with: + name: darknet-cuda-${{ runner.os }} + path: cfg + - uses: actions/upload-artifact@v2 + with: + name: darknet-cuda-${{ runner.os }} + path: data + - uses: actions/upload-artifact@v2 + with: + name: darknet-cuda-${{ runner.os }} + path: ${{ github.workspace }}/*dark* + - uses: actions/upload-artifact@v2 + with: + name: darknet-cuda-${{ runner.os }} + path: ${{ github.workspace }}/uselib* + + + ubuntu-no-ocv-cpp: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release -DBUILD_AS_CPP:BOOL=TRUE" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--target install' + + + osx-vcpkg: + runs-on: macOS-latest + steps: + - uses: actions/checkout@v2 + + - name: Install dependencies + run: brew install libomp yasm + + - uses: lukka/get-cmake@latest + + - name: Restore from cache and run vcpkg + env: + vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_osx.diff + uses: lukka/run-vcpkg@v2 + with: + vcpkgArguments: '@${{ env.vcpkgResponseFile }}' + vcpkgDirectory: '${{ github.workspace }}/vcpkg' + vcpkgGitCommitId: '8121b4ec3d6a11353daf7639ed9082a78e617a2e' + appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }} + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeBuildType: 'Release' + cmakeAppendedArgs: "-DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release" + buildWithCMakeArgs: '--target install' + + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: cfg + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: data + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: ${{ github.workspace }}/*dark* + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: ${{ github.workspace }}/uselib* + + + osx: + runs-on: macOS-latest + steps: + - uses: actions/checkout@v2 + + - name: Install dependencies + run: brew install opencv libomp + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--target install' + + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: cfg + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: data + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: ${{ github.workspace }}/*dark* + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: ${{ github.workspace }}/uselib* + + + osx-no-ocv-no-omp-cpp: + runs-on: macOS-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release -DBUILD_AS_CPP:BOOL=TRUE" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--target install' + + + win-vcpkg: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: Restore from cache and run vcpkg + env: + vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_windows.diff + uses: lukka/run-vcpkg@v2 + with: + vcpkgArguments: '@${{ env.vcpkgResponseFile }}' + vcpkgDirectory: '${{ github.workspace }}/vcpkg' + vcpkgGitCommitId: '8121b4ec3d6a11353daf7639ed9082a78e617a2e' + appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }} + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--config Release --target install' + + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: cfg + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: data + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: ${{ github.workspace }}/*dark* + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: ${{ runner.workspace }}/buildDirectory/Release/*.dll + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-${{ runner.os }} + path: ${{ github.workspace }}/uselib* + + + win-vcpkg-cuda: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - name: 'Install CUDA' + run: | + choco install cuda --version=10.2.89.20191206 -y + $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.." + Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + refreshenv + + - uses: lukka/get-cmake@latest + + - name: Restore from cache and run vcpkg + env: + vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_windows_cuda.diff + CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDA_TOOLKIT_ROOT_DIR: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDACXX: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe" + + uses: lukka/run-vcpkg@v2 + with: + vcpkgArguments: '@${{ env.vcpkgResponseFile }}' + vcpkgDirectory: '${{ github.workspace }}/vcpkg' + vcpkgGitCommitId: '8121b4ec3d6a11353daf7639ed9082a78e617a2e' + appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }} + + - name: 'Build with CMake and Ninja' + env: + CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDA_TOOLKIT_ROOT_DIR: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDACXX: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe" + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--config Release --target install' + + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-cuda-${{ runner.os }} + path: cfg + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-cuda-${{ runner.os }} + path: data + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-cuda-${{ runner.os }} + path: ${{ github.workspace }}/*dark* + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-cuda-${{ runner.os }} + path: ${{ runner.workspace }}/buildDirectory/Release/*.dll + - uses: actions/upload-artifact@v2 + with: + name: darknet-vcpkg-cuda-${{ runner.os }} + path: ${{ github.workspace }}/uselib* + + + win-integrated-libs: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--config Release --target install' + + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: cfg + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: data + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: ${{ github.workspace }}/*dark* + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: ${{ github.workspace }}/3rdparty/pthreads/bin/*.dll + - uses: actions/upload-artifact@v2 + with: + name: darknet-${{ runner.os }} + path: ${{ github.workspace }}/uselib* + + + win-intlibs-cpp: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release -DBUILD_AS_CPP:BOOL=TRUE" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--config Release --target install' + + + win-intlibs-cuda: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - name: 'Install CUDA' + run: | + choco install cuda --version=10.2.89.20191206 -y + $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.." + Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + refreshenv + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake and Ninja' + env: + CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDA_TOOLKIT_ROOT_DIR: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + CUDACXX: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe" + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--config Release --target install' + + + mingw: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake and Ninja' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-G\"MinGW Makefiles\" -DCMAKE_BUILD_TYPE=Release" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--config Release --target install' + + + cygwin: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - name: 'Install Cygwin' + run: | + choco install cygwin -y + choco install cyg-get -y + cyg-get gcc-g++ cmake make libopencv-devel libncurses-devel + + - name: 'Build' + run: | + mkdir buildDirectory + cd buildDirectory + path C:\tools\cygwin\bin + bash -c 'cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE="Release"' + bash -c 'cmake --build . --target install -- -j8' + shell: cmd From 2067e3873f4277097763d63e658a134680a0570c Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Tue, 15 Dec 2020 12:25:12 +0800 Subject: [PATCH 17/20] update --- .github/workflows/ccpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 3f3cd4b062a..6f40067f4c2 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -82,7 +82,7 @@ jobs: - name: Restore from cache and run vcpkg env: - ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true' + ACTIONS_ALLOW_UNSECURE_COMMANDS: true vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_linux.diff uses: lukka/run-vcpkg@v2 with: From 25ffaf4ce49ae0fb657a089a0ccfa128b6ae68c7 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Tue, 15 Dec 2020 12:31:13 +0800 Subject: [PATCH 18/20] fix set-env issue --- .github/workflows/ccpp.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 6f40067f4c2..cf383f6b169 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -255,6 +255,7 @@ jobs: - name: Restore from cache and run vcpkg env: + ACTIONS_ALLOW_UNSECURE_COMMANDS: true vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_osx.diff uses: lukka/run-vcpkg@v2 with: @@ -359,6 +360,7 @@ jobs: - name: Restore from cache and run vcpkg env: + ACTIONS_ALLOW_UNSECURE_COMMANDS: true vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_windows.diff uses: lukka/run-vcpkg@v2 with: @@ -415,6 +417,7 @@ jobs: - name: Restore from cache and run vcpkg env: + ACTIONS_ALLOW_UNSECURE_COMMANDS: true vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_windows_cuda.diff CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" From 22a44da9f8d01b39f54e6e36dc4d8ff7d09cff6f Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Tue, 15 Dec 2020 12:49:19 +0800 Subject: [PATCH 19/20] fix set-env issue --- .github/workflows/ccpp.yml | 2 ++ Makefile | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index cf383f6b169..085507fd319 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -370,6 +370,8 @@ jobs: appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }} - name: 'Build with CMake and Ninja' + env: + ACTIONS_ALLOW_UNSECURE_COMMANDS: true uses: lukka/run-cmake@v2 with: cmakeListsOrSettingsJson: CMakeListsTxtAdvanced diff --git a/Makefile b/Makefile index 85925cd238b..f42109ba1ce 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -GPU=0 -CUDNN=0 +GPU=1 +CUDNN=1 CUDNN_HALF=0 -OPENCV=0 +OPENCV=1 AVX=0 OPENMP=0 LIBSO=0 From b1538bfba1e37f81709f09695ebec0fb56941a52 Mon Sep 17 00:00:00 2001 From: edwardxliu Date: Tue, 15 Dec 2020 13:55:02 +0800 Subject: [PATCH 20/20] fix set-env issue --- .github/workflows/ccpp.yml | 1 + Makefile | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 085507fd319..ba696defcee 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -435,6 +435,7 @@ jobs: - name: 'Build with CMake and Ninja' env: + ACTIONS_ALLOW_UNSECURE_COMMANDS: true CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" CUDA_TOOLKIT_ROOT_DIR: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" diff --git a/Makefile b/Makefile index f42109ba1ce..85925cd238b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -GPU=1 -CUDNN=1 +GPU=0 +CUDNN=0 CUDNN_HALF=0 -OPENCV=1 +OPENCV=0 AVX=0 OPENMP=0 LIBSO=0