From f31c6bab113bdc39a629bbae6e9897de94a346d5 Mon Sep 17 00:00:00 2001
From: edward <414252595@qq.com>
Date: Thu, 24 Sep 2020 17:53:59 +0800
Subject: [PATCH 01/20] add RTMP stream function with FFMPEG

---
 Makefile         |  13 +-
 src/detector.c   |  23 +-
 src/stream.cpp   | 580 +++++++++++++++++++++++++++++++++++++++++++++++
 src/stream.h     |  18 ++
 src/streamer.cpp | 262 +++++++++++++++++++++
 src/streamer.hpp | 199 ++++++++++++++++
 6 files changed, 1085 insertions(+), 10 deletions(-)
 create mode 100644 src/stream.cpp
 create mode 100644 src/stream.h
 create mode 100644 src/streamer.cpp
 create mode 100644 src/streamer.hpp

diff --git a/Makefile b/Makefile
index e6a4ad73884..102ea08d6c8 100644
--- a/Makefile
+++ b/Makefile
@@ -60,7 +60,7 @@ APPNAMESO=uselib
 endif
 
 ifeq ($(USE_CPP), 1)
-CC=g++
+CC=g++ -std=c++11
 else
 CC=gcc
 endif
@@ -68,9 +68,9 @@ endif
 CPP=g++ -std=c++11
 NVCC=nvcc
 OPTS=-Ofast
-LDFLAGS= -lm -pthread
-COMMON= -Iinclude/ -I3rdparty/stb/include
-CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC
+LDFLAGS= -lm -pthread -L/usr/local/lib -L/usr/local/Cellar/ffmpeg/4.1.3/lib -lavformat -lavcodec -lavutil -lswscale 
+COMMON= -Iinclude/ -I3rdparty/stb/include -I/usr/local/include -I/usr/local/Cellar/ffmpeg/4.1.3/include
+CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas #-fPIC -lavformat -lavcodec -lavutil -lswscale
 
 ifeq ($(DEBUG), 1)
 #OPTS= -O0 -g
@@ -143,7 +143,7 @@ LDFLAGS+= -L/usr/local/zed/lib -lsl_zed
 endif
 endif
 
-OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o
+OBJ=stream.o streamer.o image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o
 ifeq ($(GPU), 1)
 LDFLAGS+= -lstdc++
 OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
@@ -161,7 +161,8 @@ $(LIBNAMESO): $(OBJDIR) $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp
 	$(CPP) -shared -std=c++11 -fvisibility=hidden -DLIB_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS)
 
 $(APPNAMESO): $(LIBNAMESO) include/yolo_v2_class.hpp src/yolo_console_dll.cpp
-	$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO)
+	#$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO)
+	$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ $(LIBNAMESO)
 endif
 
 $(EXEC): $(OBJS)
diff --git a/src/detector.c b/src/detector.c
index 52511fb0dfb..4f05e0077ee 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -8,6 +8,7 @@
 #include "box.h"
 #include "demo.h"
 #include "option_list.h"
+#include "stream.h"
 
 #ifndef __COMPAR_FN_T
 #define __COMPAR_FN_T
@@ -157,7 +158,6 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
     args.mosaic_bound = net.mosaic_bound;
     args.contrastive = net.contrastive;
     args.contrastive_jit_flip = net.contrastive_jit_flip;
-    args.contrastive_color = net.contrastive_color;
     if (dont_show && show_imgs) show_imgs = 2;
     args.show_imgs = show_imgs;
 
@@ -1965,6 +1965,15 @@ void run_detector(int argc, char **argv)
     int ext_output = find_arg(argc, argv, "-ext_output");
     int save_labels = find_arg(argc, argv, "-save_labels");
     char* chart_path = find_char_arg(argc, argv, "-chart", 0);
+
+    int stream_bitrate = find_int_arg(argc, argv, "-stream_bitrate", 0);
+    int stream_frame_width = find_int_arg(argc, argv, "-stream_width", 0);
+    int stream_frame_height = find_int_arg(argc, argv, "-stream_height", 0);
+    int stream_gop_size = find_int_arg(argc, argv, "-stream_gop", 0);
+    int stream_fps = find_int_arg(argc, argv, "-stream_fps", 0);
+    char *stream_addr = find_char_arg(argc, argv, "-stream_address", 0);
+    char *stream_profile = find_char_arg(argc, argv, "-stream_profile", "high444");
+
     if (argc < 4) {
         fprintf(stderr, "usage: %s %s [train/test/valid/demo/map] [data] [cfg] [weights (optional)]\n", argv[0], argv[1]);
         return;
@@ -2012,7 +2021,7 @@ void run_detector(int argc, char **argv)
         int it_num = 100;
         draw_object(datacfg, cfg, weights, filename, thresh, dont_show, it_num, letter_box, benchmark_layers);
     }
-    else if (0 == strcmp(argv[2], "demo")) {
+    else if (0 == strcmp(argv[2], "demo") || 0 == strcmp(argv[2], "stream")) {
         list *options = read_data_cfg(datacfg);
         int classes = option_find_int(options, "classes", 20);
         char *name_list = option_find_str(options, "names", "data/names.list");
@@ -2020,9 +2029,15 @@ void run_detector(int argc, char **argv)
         if (filename)
             if (strlen(filename) > 0)
                 if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0;
-        demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename,
-            mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers);
 
+        if (0 == strcmp(argv[2], "demo")){
+            demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename,
+            mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers);
+        }else if (0 == strcmp(argv[2], "stream")){
+            stream(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename,
+            mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers,
+            stream_bitrate, stream_addr, stream_frame_width, stream_frame_height, stream_profile, stream_gop_size, stream_fps);
+        }
         free_list_contents_kvp(options);
         free_list(options);
     }
diff --git a/src/stream.cpp b/src/stream.cpp
new file mode 100644
index 00000000000..0a60b2e61f1
--- /dev/null
+++ b/src/stream.cpp
@@ -0,0 +1,580 @@
+#include "network.h"
+#include "detection_layer.h"
+#include "region_layer.h"
+#include "cost_layer.h"
+#include "utils.h"
+#include "parser.h"
+#include "box.h"
+#include "image.h"
+//#include "demo.h"
+#include "darknet.h"
+#ifdef WIN32
+#include <time.h>
+#include "gettimeofday.h"
+#else
+#include <sys/time.h>
+#endif
+
+#ifdef OPENCV
+
+#include "http_stream.h"
+#include "streamer.hpp"
+#include "stream.h"
+
+#include <string>
+#include <cstdio>
+#include <cstdlib>
+#include <unistd.h>
+#include <chrono>
+#include <vector>
+#include <opencv2/opencv.hpp>
+
+using namespace streamer;
+using time_point = std::chrono::high_resolution_clock::time_point;
+using high_resolution_clock = std::chrono::high_resolution_clock;
+using std::cerr;
+using std::endl;
+
+static char **demo_names;
+static image **demo_alphabet;
+static int demo_classes;
+
+static int nboxes = 0;
+static detection *dets = NULL;
+
+static network net;
+static image in_s ;
+static image det_s;
+
+static cap_cv *cap;
+static float fps = 0;
+static float demo_thresh = 0;
+static int demo_ext_output = 0;
+static long long int frame_id = 0;
+static int demo_json_port = -1;
+
+
+static int avg_frames;
+static int demo_index = 0;
+static mat_cv** cv_images;
+
+mat_cv* in_img;
+mat_cv* det_img;
+mat_cv* show_img;
+
+static volatile int flag_exit;
+static int letter_box = 0;
+
+static const int thread_wait_ms = 1;
+static volatile int run_fetch_in_thread = 0;
+static volatile int run_detect_in_thread = 0;
+
+class MovingAverage
+{
+    int size;
+    int pos;
+    bool crossed;
+    std::vector<double> v;
+
+public:
+    explicit MovingAverage(int sz)
+    {
+        size = sz;
+        v.resize(size);
+        pos = 0;
+        crossed = false;
+    }
+
+    void add_value(double value)
+    {
+        v[pos] = value;
+        pos++;
+        if(pos == size) {
+            pos = 0;
+            crossed = true;
+        }
+    }
+
+    double get_average()
+    {
+        double avg = 0.0;
+        int last = crossed ? size : pos;
+        int k=0;
+        for(k=0;k<last;k++) {
+            avg += v[k];
+        }
+        return avg / (double)last;
+    }
+};
+
+static void add_delay(size_t streamed_frames, size_t fps, double elapsed, double avg_frame_time)
+{
+    //compute min number of frames that should have been streamed based on fps and elapsed
+    double dfps = fps;
+    size_t min_streamed = (size_t) (dfps*elapsed);
+    size_t min_plus_margin = min_streamed + 2;
+
+    if(streamed_frames > min_plus_margin) {
+        size_t excess = streamed_frames - min_plus_margin;
+        double dexcess = excess;
+
+        //add a delay ~ excess*processing_time
+//#define SHOW_DELAY
+#ifdef SHOW_DELAY
+        double delay = dexcess*avg_frame_time*1000000.0;
+        printf("frame %07lu adding delay %.4f\n", streamed_frames, delay);
+        printf("avg fps = %.2f\n", streamed_frames/elapsed);
+#endif
+        usleep(dexcess*avg_frame_time*1000000.0);
+    }
+}
+
+void process_frame(mat_cv *mat_ptr, cv::Mat &out)
+{
+    try{
+        if (mat_ptr == NULL) return;
+        cv::Mat &mat = *(cv::Mat *)mat_ptr;
+        mat.copyTo(out);
+    }catch (...) {
+        cerr << "OpenCV exception: process_frame \n";
+    }
+}
+
+
+void stream_frame(Streamer &streamer, const cv::Mat &image)
+{
+    streamer.stream_frame(image.data);
+}
+
+
+void stream_frame(Streamer &streamer, const cv::Mat &image, int64_t frame_duration)
+{
+    streamer.stream_frame(image.data, frame_duration);
+}
+
+void *fetch_in_thread(void *ptr)
+{
+    while (!custom_atomic_load_int(&flag_exit)) {
+        while (!custom_atomic_load_int(&run_fetch_in_thread)) {
+            if (custom_atomic_load_int(&flag_exit)) return 0;
+            this_thread_yield();
+        }
+        int dont_close_stream = 0;    // set 1 if your IP-camera periodically turns off and turns on video-stream
+        if (letter_box)
+            in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
+        else
+            in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
+        if (!in_s.data) {
+            printf("Stream closed.\n");
+            custom_atomic_store_int(&flag_exit, 1);
+            custom_atomic_store_int(&run_fetch_in_thread, 0);
+            //exit(EXIT_FAILURE);
+            return 0;
+        }
+        //in_s = resize_image(in, net.w, net.h);
+
+        custom_atomic_store_int(&run_fetch_in_thread, 0);
+    }
+    return 0;
+}
+
+void *fetch_in_thread_sync(void *ptr)
+{
+    custom_atomic_store_int(&run_fetch_in_thread, 1);
+    while (custom_atomic_load_int(&run_fetch_in_thread)) this_thread_sleep_for(thread_wait_ms);
+    return 0;
+}
+
+void *detect_in_thread(void *ptr)
+{
+    while (!custom_atomic_load_int(&flag_exit)) {
+        while (!custom_atomic_load_int(&run_detect_in_thread)) {
+            if (custom_atomic_load_int(&flag_exit)) return 0;
+            this_thread_yield();
+        }
+
+        layer l = net.layers[net.n - 1];
+        float *X = det_s.data;
+        float *prediction = network_predict(net, X);
+
+        cv_images[demo_index] = det_img;
+        det_img = cv_images[(demo_index + avg_frames / 2 + 1) % avg_frames];
+        demo_index = (demo_index + 1) % avg_frames;
+
+        if (letter_box)
+            dets = get_network_boxes(&net, get_width_mat(in_img), get_height_mat(in_img), demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
+        else
+            dets = get_network_boxes(&net, net.w, net.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized
+
+        custom_atomic_store_int(&run_detect_in_thread, 0);
+    }
+
+    return 0;
+}
+
+void *detect_in_thread_sync(void *ptr)
+{
+    custom_atomic_store_int(&run_detect_in_thread, 1);
+    while (custom_atomic_load_int(&run_detect_in_thread)) this_thread_sleep_for(thread_wait_ms);
+    return 0;
+}
+
+double get_wall_time()
+{
+    struct timeval walltime;
+    if (gettimeofday(&walltime, NULL)) {
+        return 0;
+    }
+    return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001;
+}
+void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes,
+    int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host,
+    int benchmark, int benchmark_layers,
+    int stream_bitrate, char *dst_stream_addr, int dst_frame_width, int dst_frame_height, char *stream_profile, int stream_gop_size, int stream_fps)
+{
+    if (avgframes < 1) avgframes = 1;
+    avg_frames = avgframes;
+    letter_box = letter_box_in;
+    in_img = det_img = show_img = NULL;
+    //skip = frame_skip;
+    image **alphabet = load_alphabet();
+    int delay = frame_skip;
+    demo_names = names;
+    demo_alphabet = alphabet;
+    demo_classes = classes;
+    demo_thresh = thresh;
+    demo_ext_output = ext_output;
+    demo_json_port = json_port;
+    printf("Demo\n");
+    net = parse_network_cfg_custom(cfgfile, 1, 1);    // set batch=1
+    if(weightfile){
+        load_weights(&net, weightfile);
+    }
+    net.benchmark_layers = benchmark_layers;
+    fuse_conv_batchnorm(net);
+    calculate_binary_weights(net);
+    srand(2222222);
+
+    if(filename){
+        printf("video file: %s\n", filename);
+        cap = get_capture_video_stream(filename);
+    }else{
+        printf("Webcam index: %d\n", cam_index);
+        cap = get_capture_webcam(cam_index);
+    }
+
+    if (!cap) {
+#ifdef WIN32
+        printf("Check that you have copied file opencv_ffmpeg340_64.dll to the same directory where is darknet.exe \n");
+#endif
+        error("Couldn't connect to webcam.\n");
+    }
+
+    layer l = net.layers[net.n-1];
+    int j;
+
+    cv_images = (mat_cv**)xcalloc(avg_frames, sizeof(mat_cv));
+
+    int i;
+    for (i = 0; i < net.n; ++i) {
+        layer lc = net.layers[i];
+        if (lc.type == YOLO) {
+            lc.mean_alpha = 1.0 / avg_frames;
+            l = lc;
+        }
+    }
+
+    if (l.classes != demo_classes) {
+        printf("\n Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes);
+        getchar();
+        exit(0);
+    }
+
+    flag_exit = 0;
+
+    custom_thread_t fetch_thread = NULL;
+    custom_thread_t detect_thread = NULL;
+    if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
+    if (custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
+
+    fetch_in_thread_sync(0); //fetch_in_thread(0);
+    det_img = in_img;
+    det_s = in_s;
+
+    fetch_in_thread_sync(0); //fetch_in_thread(0);
+    detect_in_thread_sync(0); //fetch_in_thread(0);
+    det_img = in_img;
+    det_s = in_s;
+
+    for (j = 0; j < avg_frames / 2; ++j) {
+        free_detections(dets, nboxes);
+        fetch_in_thread_sync(0); //fetch_in_thread(0);
+        detect_in_thread_sync(0); //fetch_in_thread(0);
+        det_img = in_img;
+        det_s = in_s;
+    }
+
+    int count = 0;
+    if(!prefix && !dont_show){
+        int full_screen = 0;
+        //create_window_cv("Demo", full_screen, 1352, 1013);
+    }
+
+
+    write_cv* output_video_writer = NULL;
+    if (out_filename && !flag_exit)
+    {
+        int src_fps = 25;
+        src_fps = get_stream_fps_cpp_cv(cap);
+        output_video_writer =
+            create_video_writer(out_filename, 'D', 'I', 'V', 'X', src_fps, get_width_mat(det_img), get_height_mat(det_img), 1);
+
+        //'H', '2', '6', '4'
+        //'D', 'I', 'V', 'X'
+        //'M', 'J', 'P', 'G'
+        //'M', 'P', '4', 'V'
+        //'M', 'P', '4', '2'
+        //'X', 'V', 'I', 'D'
+        //'W', 'M', 'V', '2'
+    }
+
+    int send_http_post_once = 0;
+    const double start_time_lim = get_time_point();
+    double before = get_time_point();
+    double start_time = get_time_point();
+    float avg_fps = 0;
+    int frame_counter = 0;
+    int global_frame_counter = 0;
+
+    Streamer streamer;
+    int src_frame_width = get_width_mat(det_img);
+    int src_frame_height = get_height_mat(det_img);
+    int src_fps = get_stream_fps_cpp_cv(cap);
+    printf("video info w = %d, h = %d, fps = %d\n", src_frame_width, src_frame_height, src_fps);
+
+    if (!dst_stream_addr) {
+        fprintf(stderr, "Please input a valid stream address \n");
+        exit(1);
+    }
+    if (!dst_frame_width) dst_frame_width = src_frame_width;
+    if (!dst_frame_height) dst_frame_height = src_frame_height;
+    if (!stream_bitrate) stream_bitrate = 500000;
+    if (!stream_fps) stream_fps = src_fps;
+    //if (!stream_profile) stream_profile = "high444";
+    if (!stream_gop_size) stream_gop_size = 10;
+
+    StreamerConfig streamer_config(src_frame_width, src_frame_height,
+                                dst_frame_width, dst_frame_height,
+                                stream_fps, stream_bitrate, stream_gop_size, stream_profile, dst_stream_addr);
+
+    streamer.enable_av_debug_log();
+    streamer.init(streamer_config);
+    printf("stream info w = %d, h = %d, fps = %d, bitrate = %d, profile = %s, gop = %d, address = %s\n", dst_frame_width, dst_frame_height, stream_fps, stream_bitrate, stream_profile, stream_gop_size, dst_stream_addr);
+
+    size_t streamed_frames = 0;
+
+    high_resolution_clock clk;
+    time_point time_start = clk.now();
+    time_point time_prev = time_start;
+
+    MovingAverage moving_average(10);
+    double avg_frame_time;
+
+    cv::Mat proc_frame;
+
+    time_point time_stop = clk.now();
+    auto elapsed_time = std::chrono::duration_cast<std::chrono::duration<double>>(time_stop - time_start);
+    auto frame_time = std::chrono::duration_cast<std::chrono::duration<double>>(time_stop - time_prev);
+
+    while(1){
+        ++count;
+        {
+            const float nms = .45;    // 0.4F
+            int local_nboxes = nboxes;
+            detection *local_dets = dets;
+            this_thread_yield();
+
+            if (!benchmark) custom_atomic_store_int(&run_fetch_in_thread, 1); // if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
+            custom_atomic_store_int(&run_detect_in_thread, 1); // if (custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
+
+            //if (nms) do_nms_obj(local_dets, local_nboxes, l.classes, nms);    // bad results
+            if (nms) {
+                if (l.nms_kind == DEFAULT_NMS) do_nms_sort(local_dets, local_nboxes, l.classes, nms);
+                else diounms_sort(local_dets, local_nboxes, l.classes, nms, l.nms_kind, l.beta_nms);
+            }
+
+            if (l.embedding_size) set_track_id(local_dets, local_nboxes, demo_thresh, l.sim_thresh, l.track_ciou_norm, l.track_history_size, l.dets_for_track, l.dets_for_show);
+
+            //printf("\033[2J");
+            //printf("\033[1;1H");
+            //printf("\nFPS:%.1f\n", fps);
+            printf("Objects:\n\n");
+
+            ++frame_id;
+            if (demo_json_port > 0) {
+                int timeout = 400000;
+                send_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, demo_json_port, timeout);
+            }
+
+            //char *http_post_server = "webhook.site/898bbd9b-0ddd-49cf-b81d-1f56be98d870";
+            if (http_post_host && !send_http_post_once) {
+                int timeout = 3;            // 3 seconds
+                int http_post_port = 80;    // 443 https, 80 http
+                if (send_http_post_request(http_post_host, http_post_port, filename,
+                    local_dets, nboxes, classes, names, frame_id, ext_output, timeout))
+                {
+                    if (time_limit_sec > 0) send_http_post_once = 1;
+                }
+            }
+
+            if (!benchmark && !dontdraw_bbox) draw_detections_cv_v3(show_img, local_dets, local_nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes, demo_ext_output);
+            free_detections(local_dets, local_nboxes);
+
+            printf("\nFPS:%.1f \t AVG_FPS:%.1f\n", fps, avg_fps);
+
+            if(!prefix){
+                if (!dont_show) {
+                    const int each_frame = max_val_cmp(1, avg_fps / 60);
+                    if(global_frame_counter % each_frame == 0){ //show_image_mat(show_img, "Demo");
+                        process_frame(show_img, proc_frame);
+                        if(!filename){
+                            stream_frame(streamer, proc_frame);
+                        }else{
+                            stream_frame(streamer, proc_frame, frame_time.count()*streamer.inv_stream_timebase);
+                        }
+                    }
+                    int c = wait_key_cv(1);
+                    if (c == 10) {
+                        if (frame_skip == 0) frame_skip = 60;
+                        else if (frame_skip == 4) frame_skip = 0;
+                        else if (frame_skip == 60) frame_skip = 4;
+                        else frame_skip = 0;
+                    }
+                    else if (c == 27 || c == 1048603) // ESC - exit (OpenCV 2.x / 3.x)
+                    {
+                        flag_exit = 1;
+                    }
+                }
+            }else{
+                char buff[256];
+                sprintf(buff, "%s_%08d.jpg", prefix, count);
+                if(show_img) save_cv_jpg(show_img, buff);
+            }
+
+            // if you run it with param -mjpeg_port 8090  then open URL in your web-browser: http://localhost:8090
+            if (mjpeg_port > 0 && show_img) {
+                int port = mjpeg_port;
+                int timeout = 400000;
+                int jpeg_quality = 40;    // 1 - 100
+                send_mjpeg(show_img, port, timeout, jpeg_quality);
+            }
+
+            // save video file
+            if (output_video_writer && show_img) {
+                write_frame_cv(output_video_writer, show_img);
+                printf("\n cvWriteFrame \n");
+            }
+
+            while (custom_atomic_load_int(&run_detect_in_thread)) {
+                if(avg_fps > 180) this_thread_yield();
+                else this_thread_sleep_for(thread_wait_ms);   // custom_join(detect_thread, 0);
+            }
+            if (!benchmark) {
+                while (custom_atomic_load_int(&run_fetch_in_thread)) {
+                    if (avg_fps > 180) this_thread_yield();
+                    else this_thread_sleep_for(thread_wait_ms);   // custom_join(fetch_thread, 0);
+                }
+                free_image(det_s);
+            }
+
+            if (time_limit_sec > 0 && (get_time_point() - start_time_lim)/1000000 > time_limit_sec) {
+                printf(" start_time_lim = %f, get_time_point() = %f, time spent = %f \n", start_time_lim, get_time_point(), get_time_point() - start_time_lim);
+                break;
+            }
+
+            if (flag_exit == 1) break;
+
+            if(delay == 0){
+                if(!benchmark) release_mat(&show_img);
+                show_img = det_img;
+            }
+            det_img = in_img;
+            det_s = in_s;
+        }
+        --delay;
+        if(delay < 0){
+            delay = frame_skip;
+
+            //double after = get_wall_time();
+            //float curr = 1./(after - before);
+            double after = get_time_point();    // more accurate time measurements
+            float curr = 1000000. / (after - before);
+            fps = fps*0.9 + curr*0.1;
+            before = after;
+
+            float spent_time = (get_time_point() - start_time) / 1000000;
+            frame_counter++;
+            global_frame_counter++;
+            if (spent_time >= 3.0f) {
+                //printf(" spent_time = %f \n", spent_time);
+                avg_fps = frame_counter / spent_time;
+                frame_counter = 0;
+                start_time = get_time_point();
+            }
+        }
+        time_stop = clk.now();
+        elapsed_time = std::chrono::duration_cast<std::chrono::duration<double>>(time_stop - time_start);
+        frame_time = std::chrono::duration_cast<std::chrono::duration<double>>(time_stop - time_prev);
+
+        streamed_frames++;
+        moving_average.add_value(frame_time.count());
+        avg_frame_time = moving_average.get_average();
+        add_delay(streamed_frames, stream_fps, elapsed_time.count(), avg_frame_time);
+
+        //ok = video_capture.read(read_frame);
+        time_prev = time_stop;
+
+    }
+    printf("input video stream closed. \n");
+    if (output_video_writer) {
+        release_video_writer(&output_video_writer);
+        printf("output_video_writer closed. \n");
+    }
+
+    this_thread_sleep_for(thread_wait_ms);
+
+    custom_join(detect_thread, 0);
+    custom_join(fetch_thread, 0);
+
+    // free memory
+    free_image(in_s);
+    free_detections(dets, nboxes);
+
+    demo_index = (avg_frames + demo_index - 1) % avg_frames;
+    for (j = 0; j < avg_frames; ++j) {
+            release_mat(&cv_images[j]);
+    }
+    free(cv_images);
+
+    free_ptrs((void **)names, net.layers[net.n - 1].classes);
+
+    const int nsize = 8;
+    for (j = 0; j < nsize; ++j) {
+        for (i = 32; i < 127; ++i) {
+            free_image(alphabet[j][i]);
+        }
+        free(alphabet[j]);
+    }
+    free(alphabet);
+    free_network(net);
+    //cudaProfilerStop();
+}
+#else
+void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes,
+    int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host,
+    int benchmark, int benchmark_layers,
+    int stream_bitrate, char *dst_stream_addr, int dst_frame_width, int dst_frame_height, char *stream_profile, int stream_gop_size, int stream_fps)
+{
+    fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
+}
+#endif
diff --git a/src/stream.h b/src/stream.h
new file mode 100644
index 00000000000..df186f94d07
--- /dev/null
+++ b/src/stream.h
@@ -0,0 +1,18 @@
+#ifndef STREAM_H
+#define STREAM_H
+
+#include "image.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes,
+    int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host,
+    int benchmark, int benchmark_layers,
+    int stream_bitrate, char *dst_stream_addr, int dst_frame_width, int dst_frame_height, char *stream_profile, int stream_gop_size, int stream_fps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/streamer.cpp b/src/streamer.cpp
new file mode 100644
index 00000000000..bc1de65bce7
--- /dev/null
+++ b/src/streamer.cpp
@@ -0,0 +1,262 @@
+#include "streamer.hpp"
+
+#include <string>
+#include <cstdio>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define __STDC_CONSTANT_MACROS
+#ifdef _WIN32
+//Windows
+extern "C"
+{
+#include "libavformat/avformat.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/time.h"
+};
+#else
+//Linux...
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include <libavformat/avformat.h>
+#include <libavutil/mathematics.h>
+#include <libavutil/time.h>
+#ifdef __cplusplus
+};
+#endif
+#endif
+
+
+namespace streamer
+{
+
+#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P
+
+static int encode_and_write_frame(AVCodecContext *codec_ctx, AVFormatContext *fmt_ctx, AVFrame *frame)
+{
+    AVPacket pkt = {0};
+    av_init_packet(&pkt);
+
+    int ret = avcodec_send_frame(codec_ctx, frame);
+    if (ret < 0)
+    {
+        fprintf(stderr, "Error sending frame to codec context!\n");
+        return ret;
+    }
+
+    ret = avcodec_receive_packet(codec_ctx, &pkt);
+    if (ret < 0)
+    {
+        fprintf(stderr, "Error receiving packet from codec context!\n" );
+        return ret;
+    }
+
+    av_interleaved_write_frame(fmt_ctx, &pkt);
+    av_packet_unref(&pkt);
+
+    return 0;
+}
+
+
+static int set_options_and_open_encoder(AVFormatContext *fctx, AVStream *stream, AVCodecContext *codec_ctx, AVCodec *codec,
+                                        std::string codec_profile, double width, double height,
+                                        int fps, int bitrate, int gop_size, AVCodecID codec_id)
+{
+    const AVRational dst_fps = {fps, 1};
+
+    codec_ctx->codec_tag = 0;
+    codec_ctx->codec_id = codec_id;
+    codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
+    codec_ctx->width = width;
+    codec_ctx->height = height;
+    //codec_ctx->gop_size = 12;
+    codec_ctx->gop_size = gop_size;
+    codec_ctx->pix_fmt = STREAM_PIX_FMT;
+    codec_ctx->framerate = dst_fps;
+    codec_ctx->time_base = av_inv_q(dst_fps);
+    codec_ctx->bit_rate = bitrate;
+    if (fctx->oformat->flags & AVFMT_GLOBALHEADER)
+    {
+        codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+    }
+
+    stream->time_base = codec_ctx->time_base; //will be set afterwards by avformat_write_header to 1/1000
+
+    int ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx);
+    if (ret < 0)
+    {
+        fprintf(stderr, "Could not initialize stream codec parameters!\n");
+        return 1;
+    }
+
+    AVDictionary *codec_options = nullptr;
+    av_dict_set(&codec_options, "profile", codec_profile.c_str(), 0);
+    av_dict_set(&codec_options, "preset", "ultrafast", 0);
+    av_dict_set(&codec_options, "tune", "zerolatency", 0);
+    av_dict_set(&codec_options, "crf", "30", 0);
+    //av_dict_set(&codec_options, "g", "1", 0);
+    //av_dict_set(&codec_options, "ar", "44100", 0);
+    //av_dict_set(&codec_options, "strict", "-2", 0);
+    //av_dict_set(&codec_options, "-ac", "1", 0);
+    av_dict_set(&codec_options, "q", "10", 0);
+
+    // open video encoder
+    ret = avcodec_open2(codec_ctx, codec, &codec_options);
+    if (ret < 0)
+    {
+        fprintf(stderr, "Could not open video encoder!\n");
+        return 1;
+    }
+    av_dict_free(&codec_options);
+    return 0;
+}
+
+
+Streamer::Streamer()
+{
+    format_ctx = nullptr;
+    out_codec = nullptr;
+    out_stream = nullptr;
+    out_codec_ctx = nullptr;
+    rtmp_server_conn = false;
+    av_register_all();
+    inv_stream_timebase = 30.0;
+    network_init_ok = !avformat_network_init();
+}
+
+
+void Streamer::cleanup()
+{
+    if(out_codec_ctx) {
+        avcodec_close(out_codec_ctx);
+        avcodec_free_context(&out_codec_ctx);
+    }
+
+    if(format_ctx) {
+        if(format_ctx->pb) {
+            avio_close(format_ctx->pb);
+        }
+        avformat_free_context(format_ctx);
+        format_ctx = nullptr;
+    }
+}
+
+
+Streamer::~Streamer()
+{
+    cleanup();
+    avformat_network_deinit();
+}
+
+
+
+void Streamer::stream_frame(const uint8_t *data)
+{
+    if(can_stream()) {
+        const int stride[] = {static_cast<int>(config.src_width*3)};
+        sws_scale(scaler.ctx, &data, stride, 0, config.src_height, picture.frame->data, picture.frame->linesize);
+        picture.frame->pts += av_rescale_q(1, out_codec_ctx->time_base, out_stream->time_base);
+        encode_and_write_frame(out_codec_ctx, format_ctx, picture.frame);
+    }
+}
+
+
+void Streamer::stream_frame(const uint8_t *data, int64_t frame_duration)
+{
+    if(can_stream()) {
+        const int stride[] = {static_cast<int>(config.src_width*3)};
+        sws_scale(scaler.ctx, &data, stride, 0, config.src_height, picture.frame->data, picture.frame->linesize);
+        picture.frame->pts += frame_duration; //time of frame in milliseconds
+        encode_and_write_frame(out_codec_ctx, format_ctx, picture.frame);
+    }
+}
+
+
+void Streamer::enable_av_debug_log()
+{
+    //av_log_set_level(AV_LOG_DEBUG);
+    //av_log_set_level(AV_LOG_QUIET);
+    //av_log_set_level(AV_LOG_INFO);
+    av_log_set_level(AV_LOG_VERBOSE);
+    //av_log_set_level(AV_LOG_MAX_OFFSET);
+    //av_log_set_level(AV_LOG_TRACE);
+}
+
+
+int Streamer::init(const StreamerConfig &streamer_config)
+{
+    init_ok = false;
+    cleanup();
+
+    config = streamer_config;
+
+    if(!network_init_ok) {
+        return 1;
+    }
+
+    //initialize format context for output with flv and no filename
+    avformat_alloc_output_context2(&format_ctx, nullptr, "flv", nullptr);
+    if(!format_ctx) {
+        return 1;
+    }
+
+    //AVIOContext for accessing the resource indicated by url
+    if (!(format_ctx->oformat->flags & AVFMT_NOFILE)) {
+        int avopen_ret  = avio_open2(&format_ctx->pb, config.server.c_str(),
+                                     AVIO_FLAG_WRITE, nullptr, nullptr);
+        if (avopen_ret < 0)  {
+            fprintf(stderr, "failed to open stream output context, stream will not work\n");
+            return 1;
+        }
+        rtmp_server_conn = true;
+    }
+
+    //use selected codec
+    AVCodecID codec_id = AV_CODEC_ID_H264;
+    out_codec = avcodec_find_encoder(codec_id);
+    if (!(out_codec)) {
+        fprintf(stderr, "Could not find encoder for '%s'\n",
+                avcodec_get_name(codec_id));
+        return 1;
+    }
+
+    out_stream = avformat_new_stream(format_ctx, out_codec);
+    if (!out_stream) {
+        fprintf(stderr, "Could not allocate stream\n");
+        return 1;
+    }
+
+    out_codec_ctx = avcodec_alloc_context3(out_codec);
+
+    if(set_options_and_open_encoder(format_ctx, out_stream, out_codec_ctx, out_codec, config.profile,
+                                    config.dst_width, config.dst_height, config.fps, config.bitrate, config.gop_size, codec_id)) {
+        return 1;
+    }
+
+    out_stream->codecpar->extradata_size = out_codec_ctx->extradata_size;
+    out_stream->codecpar->extradata = static_cast<uint8_t*>(av_mallocz(out_codec_ctx->extradata_size));
+    memcpy(out_stream->codecpar->extradata, out_codec_ctx->extradata, out_codec_ctx->extradata_size);
+
+    av_dump_format(format_ctx, 0, config.server.c_str(), 1);
+
+    picture.init(out_codec_ctx->pix_fmt, config.dst_width, config.dst_height);
+    scaler.init(out_codec_ctx, config.src_width, config.src_height,config.dst_width, config.dst_height, SWS_BILINEAR);
+
+    if (avformat_write_header(format_ctx, nullptr) < 0)
+    {
+        fprintf(stderr, "Could not write header!\n");
+        return 1;
+    }
+
+    printf("stream time base = %d / %d \n", out_stream->time_base.num, out_stream->time_base.den);
+
+    inv_stream_timebase = (double)out_stream->time_base.den/(double)out_stream->time_base.num;
+
+    init_ok = true;
+    return 0;
+}
+
+} // namespace streamer
diff --git a/src/streamer.hpp b/src/streamer.hpp
new file mode 100644
index 00000000000..b3ae5f540d8
--- /dev/null
+++ b/src/streamer.hpp
@@ -0,0 +1,199 @@
+#ifndef STREAMER_HPP
+#define STREAMER_HPP
+
+#ifdef _WIN32
+//Windows
+extern "C" {
+#include "libavutil/opt.h"
+#include "libavcodec/avcodec.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/samplefmt.h"
+
+#include "libavformat/avformat.h"
+#include "libavcodec/avcodec.h"
+#include "libavutil/imgutils.h"
+#include "libswscale/swscale.h"
+};
+#else
+//Linux...
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include <libavutil/opt.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/channel_layout.h>
+#include <libavutil/common.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/mathematics.h>
+#include <libavutil/samplefmt.h>
+
+#include <libavformat/avformat.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/imgutils.h>
+#include <libswscale/swscale.h>
+#ifdef __cplusplus
+};
+#endif
+#endif
+
+#include <string>
+
+
+namespace streamer
+{
+
+
+class Scaler
+{
+public:
+    SwsContext *ctx;
+
+    Scaler()
+    {
+        ctx = nullptr;
+    }
+
+    ~Scaler()
+    {
+        if(ctx) {
+            sws_freeContext(ctx);
+        }
+    }
+
+    int init(AVCodecContext *codec_ctx, int src_width, int src_height, int dst_width, int dst_height, int flags)
+    {
+        ctx = sws_getContext(src_width, src_height, AV_PIX_FMT_BGR24, dst_width, dst_height,
+                             codec_ctx->pix_fmt, flags, nullptr, nullptr, nullptr);
+        if(!ctx) {
+            fprintf(stderr, "Could not initialize sample scaler!\n");
+            return 1;
+        }
+        return 0;
+    }
+};
+
+
+
+class Picture
+{
+    static const int align_frame_buffer = 32;
+public:
+
+    AVFrame *frame;
+    uint8_t *data;
+
+    int init(enum AVPixelFormat pix_fmt, int width, int height)
+    {
+        frame = nullptr;
+        data = nullptr;
+        frame = av_frame_alloc();
+
+        int sz =  av_image_get_buffer_size(pix_fmt, width, height, align_frame_buffer);
+        int ret = posix_memalign(reinterpret_cast<void**>(&data), align_frame_buffer, sz);
+
+        av_image_fill_arrays(frame->data, frame->linesize, data, pix_fmt, width, height, align_frame_buffer);
+        frame->format = pix_fmt;
+        frame->width  = width;
+        frame->height = height;
+
+        return ret;
+    }
+
+    Picture()
+    {
+        frame = nullptr;
+        data = nullptr;
+    }
+
+
+    ~Picture()
+    {
+        if(data) {
+            free(data);
+            data = nullptr;
+        }
+
+        if(frame) {
+            av_frame_free(&frame);
+        }
+    }
+};
+
+
+struct StreamerConfig
+{
+    int src_width;
+    int src_height;
+    int dst_width;
+    int dst_height;
+    int fps;
+    int bitrate;
+    int gop_size;
+    std::string profile;
+    std::string server;
+
+    StreamerConfig()
+    {
+        dst_width = 0;
+        dst_height = 0;
+        src_width = 0;
+        src_height = 0;
+        fps = 0;
+        bitrate = 0;
+        gop_size = 12;
+    }
+
+    StreamerConfig(int source_width, int source_height, int stream_width, int stream_height, int stream_fps, int stream_bitrate, int stream_gop_size,
+                   const std::string &stream_profile,
+                   const std::string &stream_server)
+    {
+        src_width = source_width;
+        src_height = source_height;
+        dst_width = stream_width;
+        dst_height = stream_height;
+        fps = stream_fps;
+        bitrate = stream_bitrate;
+        gop_size = stream_gop_size;
+        profile = stream_profile;
+        server = stream_server;
+    }
+};
+
+
+class Streamer
+{
+    bool network_init_ok;
+    bool rtmp_server_conn;
+    bool init_ok;
+
+    AVFormatContext *format_ctx;
+    AVCodec *out_codec;
+    AVStream *out_stream;
+    AVCodecContext *out_codec_ctx;
+
+    Scaler scaler;
+    Picture picture;
+
+    void cleanup();
+    bool can_stream()
+    {
+        return network_init_ok && rtmp_server_conn && init_ok;
+    }
+
+public:
+    double inv_stream_timebase;
+    StreamerConfig config;
+    Streamer();
+    ~Streamer();
+    void enable_av_debug_log();
+    int init(const StreamerConfig &streamer_config);
+    void stream_frame(const uint8_t *data);
+    void stream_frame(const uint8_t *data, int64_t frame_duration);
+
+};
+
+} // namespace streamer
+#endif

From f24a5fe38ce29295ba9a125738a41086330cd0e1 Mon Sep 17 00:00:00 2001
From: edward <414252595@qq.com>
Date: Fri, 25 Sep 2020 10:19:41 +0800
Subject: [PATCH 02/20] add RTMP stream function with FFMPEG

---
 Makefile       | 4 ++--
 src/stream.cpp | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 102ea08d6c8..d2af5a0312c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,10 @@
 GPU=0
 CUDNN=0
 CUDNN_HALF=0
-OPENCV=0
+OPENCV=1
 AVX=0
 OPENMP=0
-LIBSO=0
+LIBSO=1
 ZED_CAMERA=0
 ZED_CAMERA_v2_8=0
 
diff --git a/src/stream.cpp b/src/stream.cpp
index 0a60b2e61f1..ff68d02d12b 100644
--- a/src/stream.cpp
+++ b/src/stream.cpp
@@ -245,7 +245,7 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
     demo_thresh = thresh;
     demo_ext_output = ext_output;
     demo_json_port = json_port;
-    printf("Demo\n");
+    printf("Stream\n");
     net = parse_network_cfg_custom(cfgfile, 1, 1);    // set batch=1
     if(weightfile){
         load_weights(&net, weightfile);
@@ -315,11 +315,12 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
     }
 
     int count = 0;
+    /*
     if(!prefix && !dont_show){
         int full_screen = 0;
         //create_window_cv("Demo", full_screen, 1352, 1013);
     }
-
+    */
 
     write_cv* output_video_writer = NULL;
     if (out_filename && !flag_exit)
@@ -575,6 +576,6 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
     int benchmark, int benchmark_layers,
     int stream_bitrate, char *dst_stream_addr, int dst_frame_width, int dst_frame_height, char *stream_profile, int stream_gop_size, int stream_fps)
 {
-    fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
+    fprintf(stderr, "Stream needs OpenCV for webcam images.\n");
 }
 #endif

From 8437b6cb8f97083c4f24de29e0aa620e2c3c2b32 Mon Sep 17 00:00:00 2001
From: edward <414252595@qq.com>
Date: Fri, 25 Sep 2020 11:52:02 +0800
Subject: [PATCH 03/20] add RTMP stream function with FFMPEG

---
 Makefile       | 15 +++++++++++++--
 src/detector.c |  8 +++++++-
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index d2af5a0312c..485e8c4e58b 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,7 @@ OPENMP=0
 LIBSO=1
 ZED_CAMERA=0
 ZED_CAMERA_v2_8=0
+STREAM=1
 
 # set GPU=1 and CUDNN=1 to speedup on GPU
 # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher
@@ -60,7 +61,7 @@ APPNAMESO=uselib
 endif
 
 ifeq ($(USE_CPP), 1)
-CC=g++ -std=c++11
+CC=g++
 else
 CC=gcc
 endif
@@ -72,6 +73,13 @@ LDFLAGS= -lm -pthread -L/usr/local/lib -L/usr/local/Cellar/ffmpeg/4.1.3/lib -lav
 COMMON= -Iinclude/ -I3rdparty/stb/include -I/usr/local/include -I/usr/local/Cellar/ffmpeg/4.1.3/include
 CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas #-fPIC -lavformat -lavcodec -lavutil -lswscale
 
+ifeq ($(STREAM), 1)
+COMMON+= -DSTREAM
+CFLAGS+= -DSTREAM
+LDFLAGS+= `pkg-config --libs libavformat libavcodec libavutil libswscale 2>/dev/null`
+COMMON+= `pkg-config --cflags libavformat libavcodec libavutil libswscale 2>/dev/null`
+endif
+
 ifeq ($(DEBUG), 1)
 #OPTS= -O0 -g
 #OPTS= -Og -g
@@ -143,11 +151,14 @@ LDFLAGS+= -L/usr/local/zed/lib -lsl_zed
 endif
 endif
 
-OBJ=stream.o streamer.o image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o
+OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o
 ifeq ($(GPU), 1)
 LDFLAGS+= -lstdc++
 OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
 endif
+ifeq ($(STREAM), 1)
+OBJ+=stream.o streamer.o
+endif
 
 OBJS = $(addprefix $(OBJDIR), $(OBJ))
 DEPS = $(wildcard src/*.h) Makefile include/darknet.h
diff --git a/src/detector.c b/src/detector.c
index 4f05e0077ee..4e0ae6fd06d 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -8,7 +8,10 @@
 #include "box.h"
 #include "demo.h"
 #include "option_list.h"
+
+#ifdef STREAM
 #include "stream.h"
+#endif
 
 #ifndef __COMPAR_FN_T
 #define __COMPAR_FN_T
@@ -2033,11 +2036,14 @@ void run_detector(int argc, char **argv)
         if (0 == strcmp(argv[2], "demo")){
             demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename,
             mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers);
-        }else if (0 == strcmp(argv[2], "stream")){
+        }
+#ifdef STREAM
+        if (0 == strcmp(argv[2], "stream")){
             stream(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename,
             mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers,
             stream_bitrate, stream_addr, stream_frame_width, stream_frame_height, stream_profile, stream_gop_size, stream_fps);
         }
+#endif
         free_list_contents_kvp(options);
         free_list(options);
     }

From 9009113daa81ae739fc1363e7a4e00a22f81a67c Mon Sep 17 00:00:00 2001
From: edward <414252595@qq.com>
Date: Fri, 25 Sep 2020 11:54:40 +0800
Subject: [PATCH 04/20] minor fix

---
 Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 485e8c4e58b..484d9bff74b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,13 @@
 GPU=0
 CUDNN=0
 CUDNN_HALF=0
-OPENCV=1
+OPENCV=0
 AVX=0
 OPENMP=0
-LIBSO=1
+LIBSO=0
 ZED_CAMERA=0
 ZED_CAMERA_v2_8=0
-STREAM=1
+STREAM=0
 
 # set GPU=1 and CUDNN=1 to speedup on GPU
 # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher

From 0c0befc96f89e75adf0daa696ddc996298855087 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Mon, 28 Sep 2020 11:49:07 +0800
Subject: [PATCH 05/20] minor fix

---
 Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 484d9bff74b..46d88624add 100644
--- a/Makefile
+++ b/Makefile
@@ -172,8 +172,7 @@ $(LIBNAMESO): $(OBJDIR) $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp
 	$(CPP) -shared -std=c++11 -fvisibility=hidden -DLIB_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS)
 
 $(APPNAMESO): $(LIBNAMESO) include/yolo_v2_class.hpp src/yolo_console_dll.cpp
-	#$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO)
-	$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ $(LIBNAMESO)
+	$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO)
 endif
 
 $(EXEC): $(OBJS)

From 3aef7cfb2abec7b198a0f47ad73908513dc35112 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Mon, 28 Sep 2020 13:59:58 +0800
Subject: [PATCH 06/20] minor fix

---
 Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 46d88624add..dc0cd56a088 100644
--- a/Makefile
+++ b/Makefile
@@ -69,9 +69,9 @@ endif
 CPP=g++ -std=c++11
 NVCC=nvcc
 OPTS=-Ofast
-LDFLAGS= -lm -pthread -L/usr/local/lib -L/usr/local/Cellar/ffmpeg/4.1.3/lib -lavformat -lavcodec -lavutil -lswscale 
-COMMON= -Iinclude/ -I3rdparty/stb/include -I/usr/local/include -I/usr/local/Cellar/ffmpeg/4.1.3/include
-CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas #-fPIC -lavformat -lavcodec -lavutil -lswscale
+LDFLAGS= -lm -pthread
+COMMON= -Iinclude/ -I3rdparty/stb/include
+CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC
 
 ifeq ($(STREAM), 1)
 COMMON+= -DSTREAM

From 0d0225faaef18eef30b3ab67386fce6f7eb4bb5f Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Mon, 28 Sep 2020 18:33:19 +0800
Subject: [PATCH 07/20] add reference

---
 src/streamer.cpp | 1 +
 src/streamer.hpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/streamer.cpp b/src/streamer.cpp
index bc1de65bce7..615352ecb4c 100644
--- a/src/streamer.cpp
+++ b/src/streamer.cpp
@@ -1,3 +1,4 @@
+//from https://github.com/andreanobile/opencv_ffmpeg_streaming/
 #include "streamer.hpp"
 
 #include <string>
diff --git a/src/streamer.hpp b/src/streamer.hpp
index b3ae5f540d8..377cbd193d7 100644
--- a/src/streamer.hpp
+++ b/src/streamer.hpp
@@ -1,3 +1,4 @@
+// from https://github.com/andreanobile/opencv_ffmpeg_streaming
 #ifndef STREAMER_HPP
 #define STREAMER_HPP
 

From 7703fac445d7534b9940492b1a84fc8aa4fa37a5 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Tue, 8 Dec 2020 15:43:58 +0800
Subject: [PATCH 08/20] reduce latency of input stream from IP camera

---
 Makefile             |  27 +++--
 src/demo.c           |  29 +++++-
 src/detector.c       |   4 +
 src/image_ffmpeg.cpp | 240 +++++++++++++++++++++++++++++++++++++++++++
 src/image_ffmpeg.h   |  24 +++++
 src/stream.cpp       |  26 ++++-
 6 files changed, 334 insertions(+), 16 deletions(-)
 create mode 100644 src/image_ffmpeg.cpp
 create mode 100644 src/image_ffmpeg.h

diff --git a/Makefile b/Makefile
index dc4b9cb9102..886df70cf8a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,14 @@
-GPU=0
-CUDNN=0
+GPU=1
+CUDNN=1
 CUDNN_HALF=0
-OPENCV=0
+OPENCV=1
 AVX=0
 OPENMP=0
-LIBSO=0
+LIBSO=1
 ZED_CAMERA=0
 ZED_CAMERA_v2_8=0
 STREAM=0
+FFMPEG=1
 
 # set GPU=1 and CUDNN=1 to speedup on GPU
 # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher
@@ -16,7 +17,7 @@ STREAM=0
 # set ZED_CAMERA_v2_8=1 to enable ZED SDK 2.X
 
 USE_CPP=0
-DEBUG=0
+DEBUG=1
 
 ARCH= -gencode arch=compute_30,code=sm_30 \
       -gencode arch=compute_35,code=sm_35 \
@@ -72,9 +73,9 @@ endif
 CPP=g++ -std=c++11
 NVCC=nvcc
 OPTS=-Ofast
-LDFLAGS= -lm -pthread
+LDFLAGS=-L/usr/local/lib -lm -pthread
 COMMON= -Iinclude/ -I3rdparty/stb/include
-CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC
+CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -fpermissive
 
 ifeq ($(STREAM), 1)
 COMMON+= -DSTREAM
@@ -83,8 +84,15 @@ LDFLAGS+= `pkg-config --libs libavformat libavcodec libavutil libswscale 2>/dev/
 COMMON+= `pkg-config --cflags libavformat libavcodec libavutil libswscale 2>/dev/null`
 endif
 
+ifeq ($(FFMPEG), 1)
+COMMON+= -DFFMPEG
+CFLAGS+= -DFFMPEG
+LDFLAGS+= `pkg-config --libs libswresample libswscale libavutil libavcodec libavformat 2>/dev/null`
+COMMON+= `pkg-config --cflags libswresample libswscale libavutil libavcodec libavformat 2>/dev/null`
+endif
+
 ifeq ($(DEBUG), 1)
-#OPTS= -O0 -g
+OPTS= -O0 -g
 #OPTS= -Og -g
 COMMON+= -DDEBUG
 CFLAGS+= -DDEBUG
@@ -162,6 +170,9 @@ endif
 ifeq ($(STREAM), 1)
 OBJ+=stream.o streamer.o
 endif
+ifeq ($(FFMPEG), 1)
+OBJ+=image_ffmpeg.o
+endif
 
 OBJS = $(addprefix $(OBJDIR), $(OBJ))
 DEPS = $(wildcard src/*.h) Makefile include/darknet.h
diff --git a/src/demo.c b/src/demo.c
index 604b6990bcf..c7bf4a5debe 100644
--- a/src/demo.c
+++ b/src/demo.c
@@ -15,6 +15,10 @@
 #include <sys/time.h>
 #endif
 
+#ifdef FFMPEG
+#include "image_ffmpeg.h"
+#endif
+
 #ifdef OPENCV
 
 #include "http_stream.h"
@@ -53,6 +57,9 @@ static const int thread_wait_ms = 1;
 static volatile int run_fetch_in_thread = 0;
 static volatile int run_detect_in_thread = 0;
 
+#ifdef FFMPEG
+static int input_is_stream = 0;
+#endif
 
 void *fetch_in_thread(void *ptr)
 {
@@ -62,10 +69,16 @@ void *fetch_in_thread(void *ptr)
             this_thread_yield();
         }
         int dont_close_stream = 0;    // set 1 if your IP-camera periodically turns off and turns on video-stream
-        if (letter_box)
+        if (letter_box){
             in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
-        else
+        }else{
+#ifdef FFMPEG
+            if (input_is_stream) in_s = get_image_from_ffmpeg_stream_resize(&in_img, net.w, net.h, net.c);
+            else in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
+#else
             in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
+#endif
+        }
         if (!in_s.data) {
             printf("Stream closed.\n");
             custom_atomic_store_int(&flag_exit, 1);
@@ -107,13 +120,13 @@ void *detect_in_thread(void *ptr)
             dets = get_network_boxes(&net, get_width_mat(in_img), get_height_mat(in_img), demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
         else
             dets = get_network_boxes(&net, net.w, net.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized
-        
+
         //const float nms = .45;
         //if (nms) {
         //    if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms);
         //    else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms);
         //}
-        
+
         custom_atomic_store_int(&run_detect_in_thread, 0);
     }
 
@@ -165,6 +178,10 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
 
     if(filename){
         printf("video file: %s\n", filename);
+#ifdef FFMPEG
+        open_video_stream(filename);
+        input_is_stream = 1;
+#endif
         cap = get_capture_video_stream(filename);
     }else{
         printf("Webcam index: %d\n", cam_index);
@@ -335,7 +352,6 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
                 write_frame_cv(output_video_writer, show_img);
                 printf("\n cvWriteFrame \n");
             }
-
             while (custom_atomic_load_int(&run_detect_in_thread)) {
                 if(avg_fps > 180) this_thread_yield();
                 else this_thread_sleep_for(thread_wait_ms);   // custom_join(detect_thread, 0);
@@ -383,6 +399,9 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
                 start_time = get_time_point();
             }
         }
+#ifdef FFMPEG
+        av_pkt_unref();
+#endif
     }
     printf("input video stream closed. \n");
     if (output_video_writer) {
diff --git a/src/detector.c b/src/detector.c
index fecd4a500e0..88a04acecb7 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -13,6 +13,10 @@
 #include "stream.h"
 #endif
 
+#ifdef FFMPEG
+#include "image_ffmpeg.h"
+#endif
+
 #ifndef __COMPAR_FN_T
 #define __COMPAR_FN_T
 typedef int (*__compar_fn_t)(const void*, const void*);
diff --git a/src/image_ffmpeg.cpp b/src/image_ffmpeg.cpp
new file mode 100644
index 00000000000..03f33dcf5eb
--- /dev/null
+++ b/src/image_ffmpeg.cpp
@@ -0,0 +1,240 @@
+#include "image.h"
+#include <opencv2/opencv.hpp>
+#include <iostream>
+#include "darknet.h"
+
+#include "image_opencv.h"
+#include "image_ffmpeg.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "libavcodec/avcodec.h"
+#include "libavdevice/avdevice.h"
+#include "libavformat/avformat.h"
+#include "libavfilter/avfilter.h"
+#include "libavutil/avutil.h"
+#include "libavutil/time.h"
+#include "libswscale/swscale.h"
+#include "libavutil/pixdesc.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#pragma comment(lib, "avformat.lib")
+#pragma comment(lib, "avdevice.lib")
+#pragma comment(lib, "avcodec.lib")
+#pragma comment(lib, "avutil.lib")
+#pragma comment(lib, "avfilter.lib")
+#pragma comment(lib, "swscale.lib")
+#pragma comment(lib, "swresample.lib")
+#pragma comment(lib, "postproc.lib")
+
+using namespace std;
+using namespace cv;
+
+image mat_to_image(cv::Mat mat)
+{
+    int w = mat.cols;
+    int h = mat.rows;
+    int c = mat.channels();
+    image im = make_image(w, h, c);
+    unsigned char *data = (unsigned char *)mat.data;
+    int step = mat.step;
+    for (int y = 0; y < h; ++y) {
+        for (int k = 0; k < c; ++k) {
+            for (int x = 0; x < w; ++x) {
+                //uint8_t val = mat.ptr<uint8_t>(y)[c * x + k];
+                //uint8_t val = mat.at<Vec3b>(y, x).val[k];
+                //im.data[k*w*h + y*w + x] = val / 255.0f;
+                im.data[k*w*h + y*w + x] = data[y*step + x*c + k] / 255.0f;
+            }
+        }
+    }
+    return im;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool nRestart = false;
+AVFormatContext *ifmt_ctx = NULL;
+AVStream *pVst;
+AVCodecContext *pVideoCodecCtx = NULL;
+AVCodec *pVideoCodec = NULL;
+IplImage *pRgbImg;
+AVFrame *pFrame = av_frame_alloc();
+int got_picture;
+//uint8_t* buffer_rgb = NULL;
+AVFrame         *pFrameRGB = av_frame_alloc();
+SwsContext      *img_convert_ctx = NULL;
+//cv::Mat Img;
+AVDictionary *optionsDict = NULL;
+char            errbuf[64];
+unsigned int    i;
+AVStream        *st = NULL;
+AVPacket pkt;
+int video_st_index = -1;
+int audio_st_index = -1;
+int ret;
+
+#ifdef FFMPEG
+void close_stream()
+{
+    if (NULL != ifmt_ctx) {
+		avformat_close_input(&ifmt_ctx);
+		ifmt_ctx = NULL;
+	}
+    exit(0);
+}
+
+extern "C" void av_pkt_unref()
+{
+    av_packet_unref(&pkt);
+}
+
+extern "C" void open_video_stream(const char *filename)
+{
+    av_register_all();                                                          // Register all codecs and formats so that they can be used.
+    avformat_network_init();                                                    // Initialization of network components
+    av_dict_set(&optionsDict, "rtsp_transport", "tcp", 0);                //采用tcp传输	,,如果不设置这个有些rtsp流就会卡着
+    av_dict_set(&optionsDict, "stimeout", "2000000", 0);                  //如果没有设置stimeout
+
+    av_init_packet(&pkt);                                                       // initialize packet.
+    pkt.data = NULL;
+    pkt.size = 0;
+
+    if ((ret = avformat_open_input(&ifmt_ctx, filename, 0, &optionsDict)) < 0) {            // Open the input file for reading.
+        printf("Could not open input file '%s' (error '%s')\n", filename, av_make_error_string(errbuf, sizeof(errbuf), ret));
+        close_stream();
+    }
+
+    if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {                // Get information on the input file (number of streams etc.).
+        printf("Could not open find stream info (error '%s')\n", av_make_error_string(errbuf, sizeof(errbuf), ret));
+        close_stream();
+    }
+
+    for (i = 0; i < ifmt_ctx->nb_streams; i++) {                                // dump information
+        av_dump_format(ifmt_ctx, i, filename, 0);
+    }
+
+    for (i = 0; i < ifmt_ctx->nb_streams; i++) {                                // find video stream index
+        st = ifmt_ctx->streams[i];
+        switch (st->codec->codec_type) {
+        case AVMEDIA_TYPE_AUDIO: audio_st_index = i; break;
+        case AVMEDIA_TYPE_VIDEO: video_st_index = i; break;
+        default: break;
+        }
+    }
+    if (-1 == video_st_index) {
+        printf("No H.264 video stream in the input file\n");
+        close_stream();
+    }
+}
+
+extern "C" image get_image_from_ffmpeg_stream_resize(mat_cv** in_image, int w, int h, int c)
+{
+    cv::Mat *mat = NULL;
+    image empty_im = make_empty_image(0,0,0);
+
+    //cv::Mat Img;
+
+    do{
+        ret = av_read_frame(ifmt_ctx, &pkt);                                // read frames
+    }while(ret == AVERROR(EAGAIN) || pkt.stream_index != video_st_index);
+
+    if (ret < 0) {
+        printf("Could not read frame ---(error '%s')\n", av_make_error_string(errbuf, sizeof(errbuf), ret));
+        close_stream();
+    }
+
+    if (pkt.stream_index == video_st_index) {                               // video frame
+        printf("Video Packet size = %d\n", pkt.size);
+    }
+    else if (pkt.stream_index == audio_st_index) {                         // audio frame
+        printf("Audio Packet size = %d\n", pkt.size);
+    }
+    else {
+        printf("Unknow Packet size = %d\n", pkt.size);
+    }
+
+    //decode stream
+    if (!nRestart)
+    {
+        pVst = ifmt_ctx->streams[video_st_index];
+        pVideoCodecCtx = pVst->codec;
+        pVideoCodec = avcodec_find_decoder(pVideoCodecCtx->codec_id);
+        if (pVideoCodec == NULL)
+            return empty_im;
+        //pVideoCodecCtx = avcodec_alloc_context3(pVideoCodec);
+
+        if (avcodec_open2(pVideoCodecCtx, pVideoCodec, NULL) < 0)
+            return empty_im;
+        nRestart = true;
+    }
+
+    if (pkt.stream_index == video_st_index)
+    {
+        fprintf(stdout, "pkt.size=%d,pkt.pts=%lld, pkt.data=0x%x.\n", pkt.size, pkt.pts, (unsigned int)pkt.data);
+        int av_result = avcodec_decode_video2(pVideoCodecCtx, pFrame, &got_picture, &pkt);
+
+        if (got_picture)
+        {
+            fprintf(stdout, "decode one video frame!\n");
+        }
+
+        if (av_result < 0)
+        {
+            fprintf(stderr, "decode failed: inputbuf = 0x%x , input_framesize = %d\n", pkt.data, pkt.size);
+            return empty_im;
+        }
+        if (got_picture)
+        {
+            int bytes = avpicture_get_size(AV_PIX_FMT_RGB24, pVideoCodecCtx->width, pVideoCodecCtx->height);
+            uint8_t *buffer_rgb = (uint8_t *)av_malloc(bytes);
+            avpicture_fill((AVPicture *)pFrameRGB, buffer_rgb, AV_PIX_FMT_RGB24, pVideoCodecCtx->width, pVideoCodecCtx->height);
+
+            img_convert_ctx = sws_getContext(pVideoCodecCtx->width, pVideoCodecCtx->height, pVideoCodecCtx->pix_fmt,
+                pVideoCodecCtx->width, pVideoCodecCtx->height, AV_PIX_FMT_BGR24, SWS_FAST_BILINEAR, NULL, NULL, NULL);
+            if (img_convert_ctx == NULL)
+            {
+
+                printf("can't init convert context!\n");
+                return empty_im;
+            }
+            sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0, pVideoCodecCtx->height, pFrameRGB->data, pFrameRGB->linesize);
+            pRgbImg = cvCreateImage(cvSize(pVideoCodecCtx->width, pVideoCodecCtx->height), 8, 3);
+
+            memcpy(pRgbImg->imageData, buffer_rgb, pVideoCodecCtx->width * 3 * pVideoCodecCtx->height);
+            //image im = ipl_to_image(pRgbImg);
+
+            mat = new cv::Mat();
+            *mat = cvarrToMat(pRgbImg);
+
+            //Img = cvarrToMat(pRgbImg);
+            cvReleaseImage(&pRgbImg);
+            sws_freeContext(img_convert_ctx);
+            av_free(buffer_rgb);
+        }
+    }
+
+    //*(cv::Mat **)in_image = &Img;
+    *(cv::Mat **)in_image = mat;
+
+    cv::Mat new_img = cv::Mat(h, w, CV_8UC(c));
+    cv::resize(*mat, new_img, new_img.size(), 0, 0, cv::INTER_LINEAR);
+    cv::cvtColor(new_img, new_img, cv::COLOR_RGB2BGR);
+    image im = mat_to_image(new_img);
+
+    return im;
+}
+
+
+#endif  // FFMPEG
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/image_ffmpeg.h b/src/image_ffmpeg.h
new file mode 100644
index 00000000000..48e574d80f6
--- /dev/null
+++ b/src/image_ffmpeg.h
@@ -0,0 +1,24 @@
+#ifndef FFMPEG_H
+#define FFMPEG_H
+
+#include "image.h"
+#include "matrix.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef FFMPEG
+
+void close_stream();
+void av_pkt_unref();
+void open_video_stream(const char *filename);
+image get_image_from_ffmpeg_stream_resize(mat_cv** in_image, int w, int h, int c);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  //FFMPEG_H
diff --git a/src/stream.cpp b/src/stream.cpp
index ff68d02d12b..3f54edf1c51 100644
--- a/src/stream.cpp
+++ b/src/stream.cpp
@@ -15,6 +15,10 @@
 #include <sys/time.h>
 #endif
 
+#ifdef FFMPEG
+#include "image_ffmpeg.h"
+#endif
+
 #ifdef OPENCV
 
 #include "http_stream.h"
@@ -69,6 +73,10 @@ static const int thread_wait_ms = 1;
 static volatile int run_fetch_in_thread = 0;
 static volatile int run_detect_in_thread = 0;
 
+#ifdef FFMPEG
+static int input_is_stream = 0;
+#endif
+
 class MovingAverage
 {
     int size;
@@ -160,10 +168,16 @@ void *fetch_in_thread(void *ptr)
             this_thread_yield();
         }
         int dont_close_stream = 0;    // set 1 if your IP-camera periodically turns off and turns on video-stream
-        if (letter_box)
+        if (letter_box){
             in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
-        else
+        }else{
+#ifdef FFMPEG
+            if (input_is_stream) in_s = get_image_from_ffmpeg_stream_resize(&in_img, net.w, net.h, net.c);
+            else in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
+#else
             in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
+#endif
+        }
         if (!in_s.data) {
             printf("Stream closed.\n");
             custom_atomic_store_int(&flag_exit, 1);
@@ -257,6 +271,10 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
 
     if(filename){
         printf("video file: %s\n", filename);
+#ifdef FFMPEG
+        open_video_stream(filename);
+        input_is_stream = 1;
+#endif
         cap = get_capture_video_stream(filename);
     }else{
         printf("Webcam index: %d\n", cam_index);
@@ -534,7 +552,9 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
 
         //ok = video_capture.read(read_frame);
         time_prev = time_stop;
-
+#ifdef FFMPEG
+        av_pkt_unref();
+#endif
     }
     printf("input video stream closed. \n");
     if (output_video_writer) {

From 658621fd8f9a3d46775d87b4809ee883ee01d989 Mon Sep 17 00:00:00 2001
From: edwardxliu <44568088+edwardxliu@users.noreply.github.com>
Date: Tue, 8 Dec 2020 15:48:53 +0800
Subject: [PATCH 09/20] Update README.md

---
 README.md | 689 +-----------------------------------------------------
 1 file changed, 2 insertions(+), 687 deletions(-)

diff --git a/README.md b/README.md
index f1e6ab4ec2a..85f86e8f376 100644
--- a/README.md
+++ b/README.md
@@ -1,687 +1,2 @@
-# Yolo v4, v3 and v2 for Windows and Linux
-
-## (neural networks for object detection)
-
-Paper Yolo v4: https://arxiv.org/abs/2004.10934
-
-More details: [medium link](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7)
-
-Manual: https://github.com/AlexeyAB/darknet/wiki
-
-Discussion: 
- - [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/)
- - [Google-groups](https://groups.google.com/forum/#!forum/darknet)
- - [Discord](https://discord.gg/zSq8rtW)
-
-About Darknet framework: http://pjreddie.com/darknet/
-
-[![Darknet Continuous Integration](https://github.com/AlexeyAB/darknet/workflows/Darknet%20Continuous%20Integration/badge.svg)](https://github.com/AlexeyAB/darknet/actions?query=workflow%3A%22Darknet+Continuous+Integration%22)
-[![CircleCI](https://circleci.com/gh/AlexeyAB/darknet.svg?style=svg)](https://circleci.com/gh/AlexeyAB/darknet)
-[![TravisCI](https://travis-ci.org/AlexeyAB/darknet.svg?branch=master)](https://travis-ci.org/AlexeyAB/darknet)
-[![Contributors](https://img.shields.io/github/contributors/AlexeyAB/Darknet.svg)](https://github.com/AlexeyAB/darknet/graphs/contributors)
-[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE)
-[![DOI](https://zenodo.org/badge/75388965.svg)](https://zenodo.org/badge/latestdoi/75388965)
-[![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2004.10934-B31B1B.svg)](https://arxiv.org/abs/2004.10934)
-[![colab](https://user-images.githubusercontent.com/4096485/86174089-b2709f80-bb29-11ea-9faf-3d8dc668a1a5.png)](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE)
-[![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg)
-
-
-* [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo)
-* [Requirements (and how to install dependecies)](#requirements)
-* [Pre-trained models](#pre-trained-models)
-* [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions)
-* [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations)
-* [Yolo v4 in other frameworks (TensorRT, TensorFlow, PyTorch, OpenVINO, OpenCV-dnn, TVM,...)](#yolo-v4-in-other-frameworks)
-* [Datasets](#datasets)
-
-0. [Improvements in this repository](#improvements-in-this-repository)
-1. [How to use](#how-to-use-on-the-command-line)
-2. How to compile on Linux
-   * [Using cmake](#how-to-compile-on-linux-using-cmake)
-   * [Using make](#how-to-compile-on-linux-using-make)
-3. How to compile on Windows
-   * [Using cmake](#how-to-compile-on-windows-using-cmake)
-   * [Using vcpkg](#how-to-compile-on-windows-using-vcpkg)
-   * [Legacy way](#how-to-compile-on-windows-legacy-way)
-4. [Training and Evaluation of speed and accuracy on MS COCO](https://github.com/AlexeyAB/darknet/wiki#training-and-evaluation-of-speed-and-accuracy-on-ms-coco)
-5. [How to train with multi-GPU:](#how-to-train-with-multi-gpu)
-6. [How to train (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects)
-7. [How to train tiny-yolo (to detect your custom objects)](#how-to-train-tiny-yolo-to-detect-your-custom-objects)
-8. [When should I stop training](#when-should-i-stop-training)
-9. [How to improve object detection](#how-to-improve-object-detection)
-10. [How to mark bounded boxes of objects and create annotation files](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files)
-11. [How to use Yolo as DLL and SO libraries](#how-to-use-yolo-as-dll-and-so-libraries)
-
-![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) 
-
-![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934 
-
-
-tkDNN-TensorRT accelerates YOLOv4 **~2x** times for batch=1 and **3x-4x** times for batch=4.
-* tkDNN: https://github.com/ceccocats/tkDNN
-* OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf
-
-#### GeForce RTX 2080 Ti:
-| Network Size 	| Darknet, FPS (avg)| tkDNN TensorRT FP32, FPS  | tkDNN TensorRT FP16, FPS  | OpenCV FP16, FPS | tkDNN TensorRT FP16 batch=4, FPS  | OpenCV FP16 batch=4, FPS | tkDNN Speedup |
-|:-----:|:--------:|--------:|--------:|--------:|--------:|--------:|------:|
-|320	| 100 | 116 | **202** | 183 | 423 | **430** | **4.3x** |
-|416	| 82 | 103 | **162** | 159 | 284 | **294** | **3.6x** |
-|512	| 69 | 91 | 134 | **138** | 206 | **216** | **3.1x** |
-|608 	| 53 | 62 | 103 | **115**| 150 | **150** | **2.8x**  |
-|Tiny 416 | 443 | 609 | **790** | 773 | **1774** | 1353 | **3.5x**  |
-|Tiny 416 CPU Core i7 7700HQ | 3.4 | - | - | 42 | - | 39 | **12x**  |
-
-* Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png)
-* Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png)
-* CSPNet: [paper](https://arxiv.org/abs/1911.11929) and [map_fps](https://user-images.githubusercontent.com/4096485/71702416-6645dc00-2de0-11ea-8d65-de7d4b604021.png) comparison: https://github.com/WongKinYiu/CrossStagePartialNetworks
-* Yolo v3 on MS COCO: [Speed / Accuracy (mAP@0.5) chart](https://user-images.githubusercontent.com/4096485/52151356-e5d4a380-2683-11e9-9d7d-ac7bc192c477.jpg)
-* Yolo v3 on MS COCO (Yolo v3 vs RetinaNet) - Figure 3: https://arxiv.org/pdf/1804.02767v1.pdf
-* Yolo v2 on Pascal VOC 2007: https://hsto.org/files/a24/21e/068/a2421e0689fb43f08584de9d44c2215f.jpg
-* Yolo v2 on Pascal VOC 2012 (comp4): https://hsto.org/files/3a6/fdf/b53/3a6fdfb533f34cee9b52bdd9bb0b19d9.jpg
-
-#### Youtube video of results
-
-[![Yolo v4](http://img.youtube.com/vi/1_SiUOYUoOI/0.jpg)](https://youtu.be/1_SiUOYUoOI "Yolo v4")
-
-Others: https://www.youtube.com/user/pjreddie/videos
-
-#### How to evaluate AP of YOLOv4 on the MS COCO evaluation server
-
-1. Download and unzip test-dev2017 dataset from MS COCO server: http://images.cocodataset.org/zips/test2017.zip
-2. Download list of images for Detection taks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt
-3. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) )
-4. Content of the file `cfg/coco.data` should be
-
-```ini
-classes= 80
-train  = <replace with your path>/trainvalno5k.txt
-valid = <replace with your path>/testdev2017.txt
-names = data/coco.names
-backup = backup
-eval=coco
-```
-
-5. Create `/results/` folder near with `./darknet` executable file
-6. Run validation: `./darknet detector valid cfg/coco.data cfg/yolov4.cfg yolov4.weights`
-7. Rename the file  `/results/coco_results.json` to `detections_test-dev2017_yolov4_results.json` and compress it to `detections_test-dev2017_yolov4_results.zip`
-8. Submit file `detections_test-dev2017_yolov4_results.zip` to the MS COCO evaluation server for the `test-dev2019 (bbox)`
-
-#### How to evaluate FPS of YOLOv4 on GPU
-
-1. Compile Darknet with `GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=1` in the `Makefile`
-2. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) )
-3. Get any .avi/.mp4 video file (preferably not more than 1920x1080 to avoid bottlenecks in CPU performance)
-4. Run one of two commands and look at the AVG FPS:
-
-* include video_capturing + NMS + drawing_bboxes: 
-    `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -dont_show -ext_output`
-* exclude video_capturing + NMS + drawing_bboxes: 
-    `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -benchmark`
-
-#### Pre-trained models
-
-There are weights-file for different cfg-files (trained for MS COCO dataset):
-
-FPS on RTX 2070 (R) and Tesla V100 (V):
-
-* [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934)
-    just change `width=` and `height=` parameters in `yolov4.cfg` file and use the same `yolov4.weights` file for all cases:
-  * `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops
-  * `width=512 height=512` in cfg: **64.9% mAP@0.5 (43.0% AP@0.5:0.95) - 45(R) FPS / 83(V) FPS** - 91.1 BFlops
-  * `width=416 height=416` in cfg: **62.8% mAP@0.5 (41.2% AP@0.5:0.95) - 55(R) FPS / 96(V) FPS** - 60.1 BFlops
-  * `width=320 height=320` in cfg:   **60% mAP@0.5 (  38% AP@0.5:0.95) - 63(R) FPS / 123(V) FPS** - 35.5 BFlops
-
-* [yolov4-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg) - **40.2% mAP@0.5 - 371(1080Ti) FPS / 330(RTX2070) FPS** - 6.9 BFlops - 23.1 MB: [yolov4-tiny.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights)
-
-* [enet-coco.cfg (EfficientNetB0-Yolov3)](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/enet-coco.cfg) - **45.5% mAP@0.5 - 55(R) FPS** - 3.7 BFlops - 18.3 MB: [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view)
-
-* [yolov3-openimages.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-openimages.cfg) - 247 MB - 18(R) FPS - OpenImages dataset: [yolov3-openimages.weights](https://pjreddie.com/media/files/yolov3-openimages.weights)
-
-<details><summary><b>CLICK ME</b> - Yolo v3 models</summary>
-
-* [csresnext50-panet-spp-original-optimal.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp-original-optimal.cfg) - **65.4% mAP@0.5 (43.2% AP@0.5:0.95) - 32(R) FPS** - 100.5 BFlops - 217 MB: [csresnext50-panet-spp-original-optimal_final.weights](https://drive.google.com/open?id=1_NnfVgj0EDtb_WLNoXV8Mo7WKgwdYZCc)
-
-* [yolov3-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg) - **60.6% mAP@0.5 - 38(R) FPS** - 141.5 BFlops - 240 MB: [yolov3-spp.weights](https://pjreddie.com/media/files/yolov3-spp.weights)
-
-* [csresnext50-panet-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp.cfg) - **60.0% mAP@0.5 - 44 FPS** - 71.3 BFlops - 217 MB: [csresnext50-panet-spp_final.weights](https://drive.google.com/file/d/1aNXdM8qVy11nqTcd2oaVB3mf7ckr258-/view?usp=sharing)
-
-* [yolov3.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg) - **55.3% mAP@0.5 - 66(R) FPS** - 65.9 BFlops - 236 MB: [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights)
-
-* [yolov3-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg) - **33.1% mAP@0.5 - 345(R) FPS** - 5.6 BFlops - 33.7 MB: [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights)
-
-* [yolov3-tiny-prn.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny-prn.cfg) - **33.1% mAP@0.5 - 370(R) FPS** - 3.5 BFlops - 18.8 MB: [yolov3-tiny-prn.weights](https://drive.google.com/file/d/18yYZWyKbo4XSDVyztmsEcF9B_6bxrhUY/view?usp=sharing)
-
-</details>
-
-<details><summary><b>CLICK ME</b> - Yolo v2 models</summary>
-
-* `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights
-* `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights
-* `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights
-* `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights
-* `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights
-
-</details>
-
-Put it near compiled: darknet.exe
-
-You can get cfg-files by path: `darknet/cfg/`
-
-### Requirements
-
-* Windows or Linux
-* **CMake >= 3.12**: https://cmake.org/download/
-* **CUDA >= 10.0**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions))
-* **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png))
-* **cuDNN >= 7.0** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )
-* **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
-* on Linux **GCC or Clang**, on Windows **MSVC 2017/2019** https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community
-
-#### Yolo v4 in other frameworks
-
-* **TensorFlow:** YOLOv4 on TensorFlow 2.0 / TFlite / Andriod: https://github.com/hunglc007/tensorflow-yolov4-tflite
-    For YOLOv3 - convert `yolov3.weights`/`cfg` files to `yolov3.ckpt`/`pb/meta`: by using [mystic123](https://github.com/mystic123/tensorflow-yolo-v3) project, and [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format)
-* **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
-* **Intel OpenVINO 2020 R4:** (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) )
-* **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn
-* **PyTorch > ONNX**: 
-    * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)
-    * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch)
-    * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
-* **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/
-* **TensorRT** YOLOv4 on TensorRT+tkDNN: https://github.com/ceccocats/tkDNN
-    For YOLOv3 (-70% faster inference): [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) read [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx) implemented yolov3-spp, yolov4, etc.
-* **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream
-* **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/)
-* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backends (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
-* **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite
-* **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron
-
-#### Datasets
-
-* MS COCO: use `./scripts/get_coco_dataset.sh` to get labeled MS COCO detection dataset
-* OpenImages: use `python ./scripts/get_openimages_dataset.py` for labeling train detection dataset
-* Pascal VOC: use `python ./scripts/voc_label.py` for labeling Train/Test/Val detection datasets
-* ILSVRC2012 (ImageNet classification): use `./scripts/get_imagenet_train.sh` (also `imagenet_label.sh` for labeling valid set)
-* German/Belgium/Russian/LISA/MASTIF Traffic Sign Datasets for Detection - use this parsers: https://github.com/angeligareta/Datasets2Darknet#detection-task
-* List of other datasets: https://github.com/AlexeyAB/darknet/tree/master/scripts#datasets
-
-### Improvements in this repository
-
-* developed State-of-the-Art object detector YOLOv4
-* added State-of-Art models: CSP, PRN, EfficientNet
-* added layers: [conv_lstm], [scale_channels] SE/ASFF/BiFPN, [local_avgpool], [sam], [Gaussian_yolo], [reorg3d] (fixed [reorg]), fixed [batchnorm]
-* added the ability for training recurrent models (with layers conv-lstm`[conv_lstm]`/conv-rnn`[crnn]`) for accurate detection on video
-* added data augmentation: `[net] mixup=1 cutmix=1 mosaic=1 blur=1`. Added activations: SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX
-* added the ability for training with GPU-processing using CPU-RAM to increase the mini_batch_size and increase accuracy (instead of batch-norm sync)
-* improved binary neural network performance **2x-4x times** for Detection on CPU and GPU if you trained your own weights by using this XNOR-net model (bit-1 inference) : https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov3-tiny_xnor.cfg
-* improved neural network performance **~7%** by fusing 2 layers into 1: Convolutional + Batch-norm
-* improved performance: Detection **2x times**, on GPU Volta/Turing (Tesla V100, GeForce RTX, ...) using Tensor Cores if `CUDNN_HALF` defined in the `Makefile` or `darknet.sln`
-* improved performance **~1.2x** times on FullHD, **~2x** times on 4K, for detection on the video (file/stream) using `darknet detector demo`... 
-* improved performance **3.5 X times** of data augmentation for training (using OpenCV SSE/AVX functions instead of hand-written functions) - removes bottleneck for training on multi-GPU or GPU Volta
-* improved performance of detection and training on Intel CPU with AVX (Yolo v3 **~85%**)
-* optimized memory allocation during network resizing when `random=1`
-* optimized GPU initialization for detection - we use batch=1 initially instead of re-init with batch=1
-* added correct calculation of **mAP, F1, IoU, Precision-Recall** using command `darknet detector map`...
-* added drawing of chart of average-Loss and accuracy-mAP (`-map` flag) during training
-* run `./darknet detector demo ... -json_port 8070 -mjpeg_port 8090` as JSON and MJPEG server to get results online over the network by using your soft or Web-browser
-* added calculation of anchors for training
-* added example of Detection and Tracking objects: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
-* run-time tips and warnings if you use incorrect cfg-file or dataset
-* added support for Windows
-* many other fixes of code...
-
-And added manual - [How to train Yolo v4-v2 (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects)
-
-Also, you might be interested in using a simplified repository where is implemented INT8-quantization (+30% speedup and -1% mAP reduced): https://github.com/AlexeyAB/yolo2_light
-
-#### How to use on the command line
-
-On Linux use `./darknet` instead of `darknet.exe`, like this:`./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights`
-
-On Linux find executable file `./darknet` in the root directory, while on Windows find it in the directory `\build\darknet\x64` 
-
-* Yolo v4 COCO - **image**: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25`
-* **Output coordinates** of objects: `darknet.exe detector test cfg/coco.data yolov4.cfg yolov4.weights -ext_output dog.jpg`
-* Yolo v4 COCO - **video**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output test.mp4`
-* Yolo v4 COCO - **WebCam 0**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -c 0`
-* Yolo v4 COCO for **net-videocam** - Smart WebCam: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg`
-* Yolo v4 - **save result videofile res.avi**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -out_filename res.avi`
-* Yolo v3 **Tiny** COCO - video: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights test.mp4`
-* **JSON and MJPEG server** that allows multiple connections from your soft or Web-browser `ip-address:8070` and 8090: `./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -json_port 8070 -mjpeg_port 8090 -ext_output`
-* Yolo v3 Tiny **on GPU #1**: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights -i 1 test.mp4`
-* Alternative method Yolo v3 COCO - image: `darknet.exe detect cfg/yolov4.cfg yolov4.weights -i 0 -thresh 0.25`
-* Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): 
-    `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map`
-* 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights`
-* Remeber to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app
-* To process a list of images `data/train.txt` and save results of detection to `result.json` file use: 
-    `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt`
-* To process a list of images `data/train.txt` and save results of detection to `result.txt` use:                             
-    `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt`
-* Pseudo-lableing - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `<image_name>.txt` (in this way you can increase the amount of training data) use:
-    `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt`
-* To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
-* To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
-* To check accuracy mAP@IoU=75: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights -iou_thresh 0.75`
-
-##### For using network video-camera mjpeg-stream with any Android smartphone
-
-1. Download for Android phone mjpeg-stream soft: IP Webcam / Smart WebCam
-
-    * Smart WebCam - preferably: https://play.google.com/store/apps/details?id=com.acontech.android.SmartWebCam2
-    * IP Webcam: https://play.google.com/store/apps/details?id=com.pas.webcam
-
-2. Connect your Android phone to computer by WiFi (through a WiFi-router) or USB
-3. Start Smart WebCam on your phone
-4. Replace the address below, on shown in the phone application (Smart WebCam) and launch:
-
-* Yolo v4 COCO-model: `darknet.exe detector demo data/coco.data yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0`
-
-### How to compile on Linux/macOS (using `CMake`)
-
-The `CMakeLists.txt` will attempt to find installed optional dependencies like CUDA, cudnn, ZED and build against those. It will also create a shared object library file to use `darknet` for code development.
-
-Open a shell terminal inside the cloned repository and launch:
-
-```bash
-./build.sh
-```
-
-### How to compile on Linux (using `make`)
-
-Just do `make` in the darknet directory. (You can try to compile and run it on Google Colab in cloud [link](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) (press «Open in Playground» button at the top-left corner) and watch the video [link](https://www.youtube.com/watch?v=mKAEGSxwOAY) )
-Before make, you can set such options in the `Makefile`: [link](https://github.com/AlexeyAB/darknet/blob/9c1b9a2cf6363546c152251be578a21f3c3caec6/Makefile#L1)
-
-* `GPU=1` to build with CUDA to accelerate by using GPU (CUDA should be in `/usr/local/cuda`)
-* `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`)
-* `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x
-* `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams
-* `DEBUG=1` to bould debug version of Yolo
-* `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU
-* `LIBSO=1` to build a library `darknet.so` and binary runable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
-    or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4`
-* `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run
-    `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera`
-* You also need to specify for which graphics card the code is generated. This is done by setting `ARCH=`. If you use a never version than CUDA 11 you further need to edit line 20 from Makefile and remove `-gencode arch=compute_30,code=sm_30 \` as Kepler GPU support was dropped in CUDA 11. You can also drop the general `ARCH=` and just uncomment `ARCH=` for your graphics card.
-
-To run Darknet on Linux use examples from this article, just use `./darknet` instead of `darknet.exe`, i.e. use this command: `./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights`
-
-### How to compile on Windows (using `CMake`)
-
-This is the recommended approach to build Darknet on Windows.
-
-1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com)
-
-2. Install CUDA (at least v10.0) enabling VS Integration during installation.
-
-3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands:
-
-```PowerShell
-PS Code\>              git clone https://github.com/microsoft/vcpkg
-PS Code\>              cd vcpkg
-PS Code\vcpkg>         $env:VCPKG_ROOT=$PWD
-PS Code\vcpkg>         .\bootstrap-vcpkg.bat
-PS Code\vcpkg>         .\vcpkg install darknet[full]:x64-windows #replace with darknet[opencv-base,cuda,cudnn]:x64-windows for a quicker install of dependencies
-PS Code\vcpkg>         cd ..
-PS Code\>              git clone https://github.com/AlexeyAB/darknet
-PS Code\>              cd darknet
-PS Code\darknet>       .\build.ps1
-```
-
-## How to train with multi-GPU
-
-1. Train it first on 1 GPU for like 1000 iterations: `darknet.exe detector train cfg/coco.data cfg/yolov4.cfg yolov4.conv.137`
-
-2. Then stop and by using partially-trained model `/backup/yolov4_1000.weights` run training with multigpu (up to 4 GPUs): `darknet.exe detector train cfg/coco.data cfg/yolov4.cfg /backup/yolov4_1000.weights -gpus 0,1,2,3`
-
-If you get a Nan, then for some datasets better to decrease learning rate, for 4 GPUs set `learning_rate = 0,00065` (i.e. learning_rate = 0.00261 / GPUs). In this case also increase 4x times `burn_in =` in your cfg-file. I.e. use `burn_in = 4000` instead of `1000`.
-
-https://groups.google.com/d/msg/darknet/NbJqonJBTSY/Te5PfIpuCAAJ
-
-## How to train (to detect your custom objects)
-
-(to train old Yolo v2 `yolov2-voc.cfg`, `yolov2-tiny-voc.cfg`, `yolo-voc.cfg`, `yolo-voc.2.0.cfg`, ... [click by the link](https://github.com/AlexeyAB/darknet/tree/47c7af1cea5bbdedf1184963355e6418cb8b1b4f#how-to-train-pascal-voc-data))
-
-Training Yolo v4 (and v3):
-
-0. For training `cfg/yolov4-custom.cfg` download the pre-trained weights-file (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) )
-
-1. Create file `yolo-obj.cfg` with the same content as in `yolov4-custom.cfg` (or copy `yolov4-custom.cfg` to `yolo-obj.cfg)` and:
-
-* change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3)
-* change line subdivisions to [`subdivisions=16`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
-* change line max_batches to (`classes*2000` but not less than number of training images, but not less than number of training images and not less than `6000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes
-* change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22)
-* set network size `width=416 height=416` or any value multiple of 32: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9
-* change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers:
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783
-* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers.
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776
-* when using [`[Gaussian_yolo]`](https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L608)  layers, change [`filters=57`] filters=(classes + 9)x3 in the 3 `[convolutional]` before each `[Gaussian_yolo]` layer
-  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L604
-  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L696
-  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789
-
-So if `classes=1` then should be `filters=18`. If `classes=2` then write `filters=21`.
-  
-**(Do not write in the cfg-file: filters=(classes + 5)x3)**
-  
-(Generally `filters` depends on the `classes`, `coords` and number of `mask`s, i.e. filters=`(classes + coords + 1)*<number of mask>`, where `mask` is indices of anchors. If `mask` is absence, then filters=`(classes + coords + 1)*num`)
-
-So for example, for 2 objects, your file `yolo-obj.cfg` should differ from `yolov4-custom.cfg` in such lines in each of **3** [yolo]-layers:
-
-```ini
-[convolutional]
-filters=21
-
-[region]
-classes=2
-```
-
-2. Create file `obj.names` in the directory `build\darknet\x64\data\`, with objects names - each in new line
-
-3. Create file `obj.data` in the directory `build\darknet\x64\data\`, containing (where **classes = number of objects**):
-
-  ```ini
-  classes = 2
-  train  = data/train.txt
-  valid  = data/test.txt
-  names = data/obj.names
-  backup = backup/
-  ```
-
-4. Put image-files (.jpg) of your objects in the directory `build\darknet\x64\data\obj\`
-
-5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark
-
-It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: 
-
-`<object-class> <x_center> <y_center> <width> <height>`
-
-  Where: 
-  * `<object-class>` - integer object number from `0` to `(classes-1)`
-  * `<x_center> <y_center> <width> <height>` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]`
-  * for example: `<x> = <absolute_x> / <image_width>` or `<height> = <absolute_height> / <image_height>`
-  * atention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
-
-  For example for `img1.jpg` you will be created `img1.txt` containing:
-
-  ```
-  1 0.716797 0.395833 0.216406 0.147222
-  0 0.687109 0.379167 0.255469 0.158333
-  1 0.420312 0.395833 0.140625 0.166667
-  ```
-
-6. Create file `train.txt` in directory `build\darknet\x64\data\`, with filenames of your images, each filename in new line, with path relative to `darknet.exe`, for example containing:
-
-  ```
-  data/obj/img1.jpg
-  data/obj/img2.jpg
-  data/obj/img3.jpg
-  ```
-
-7. Download pre-trained weights for the convolutional layers and put to the directory `build\darknet\x64`
-    * for `yolov4.cfg`, `yolov4-custom.cfg` (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) )
-    * for `yolov4-tiny.cfg`, `yolov4-tiny-3l.cfg`, `yolov4-tiny-custom.cfg` (19 MB): [yolov4-tiny.conv.29](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29)  
-    * for `csresnext50-panet-spp.cfg` (133 MB): [csresnext50-panet-spp.conv.112](https://drive.google.com/file/d/16yMYCLQTY_oDlCIZPfn_sab6KD3zgzGq/view?usp=sharing)
-    * for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74)
-    * for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing)
-    * for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing)
-    
-
-8. Start training by using the command line: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137`
-     
-   To train on Linux use command: `./darknet detector train data/obj.data yolo-obj.cfg yolov4.conv.137` (just use `./darknet` instead of `darknet.exe`)
-     
-   * (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations)
-   * (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations)
-   * (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2)
-   * (to see the mAP & Loss-chart during training on remote server without GUI, use command `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` then open URL `http://ip-address:8090` in Chrome/Firefox browser)
-
-8.1. For training with mAP (mean average precisions) calculation for each 4 Epochs (set `valid=valid.txt` or `train.txt` in `obj.data` file) and run: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map`
-
-9. After training is complete - get result `yolo-obj_final.weights` from path `build\darknet\x64\backup\`
-
- * After each 100 iterations you can stop and later start training from this point. For example, after 2000 iterations you can stop training, and later just start training using: `darknet.exe detector train data/obj.data yolo-obj.cfg backup\yolo-obj_2000.weights`
-
-    (in the original repository https://github.com/pjreddie/darknet the weights-file is saved only once every 10 000 iterations `if(iterations > 1000)`)
-
- * Also you can get result earlier than all 45000 iterations.
- 
- **Note:** If during training you see `nan` values for `avg` (loss) field - then training goes wrong, but if `nan` is in some other lines - then training goes well.
- 
- **Note:** If you changed width= or height= in your cfg-file, then new width and height must be divisible by 32.
- 
- **Note:** After training use such command for detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
- 
-  **Note:** if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
- 
-### How to train tiny-yolo (to detect your custom objects):
-
-Do all the same steps as for the full yolo model as described above. With the exception of:
-* Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29
- (Or get this file from yolov4-tiny.weights file by using command: `darknet.exe partial cfg/yolov4-tiny-custom.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29`
-* Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg`
-* Start training: `darknet.exe detector train data/obj.data yolov4-tiny-obj.cfg yolov4-tiny.conv.29`
-
-For training Yolo based on other models ([DenseNet201-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/densenet201_yolo.cfg) or [ResNet50-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/resnet50_yolo.cfg)), you can download and get pre-trained weights as showed in this file: https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/partial.cmd
-If you made you custom model that isn't based on other models, then you can train it without pre-trained weights, then will be used random initial weights.
- 
-## When should I stop training:
-
-Usually sufficient 2000 iterations for each class(object), but not less than number of training images and not less than 6000 iterations in total. But for a more precise definition when you should stop training, use the following manual:
-
-1. During training, you will see varying indicators of error, and you should stop when no longer decreases **0.XXXXXXX avg**:
-
-  > Region Avg IOU: 0.798363, Class: 0.893232, Obj: 0.700808, No Obj: 0.004567, Avg Recall: 1.000000,  count: 8
-  > Region Avg IOU: 0.800677, Class: 0.892181, Obj: 0.701590, No Obj: 0.004574, Avg Recall: 1.000000,  count: 8
-  >
-  > **9002**: 0.211667, **0.60730 avg**, 0.001000 rate, 3.868000 seconds, 576128 images
-  > Loaded: 0.000000 seconds
-
-  * **9002** - iteration number (number of batch)
-  * **0.60730 avg** - average loss (error) - **the lower, the better**
-
-  When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset).
-  
-  Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. 
-
-2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them:
-
-For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to overfitting. **Overfitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**:
-
-![Overfitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) 
-
-To get weights from Early Stopping Point:
-
-  2.1. At first, in your file `obj.data` you must specify the path to the validation dataset `valid = valid.txt` (format of `valid.txt` as in `train.txt`), and if you haven't validation images, just copy `data\train.txt` to `data\valid.txt`.
-
-  2.2 If training is stopped after 9000 iterations, to validate some of previous weights use this commands:
-
-(If you use another GitHub repository, then use `darknet.exe detector recall`... instead of `darknet.exe detector map`...)
-
-* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
-* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights`
-* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights`
-
-And comapre last output lines for each weights (7000, 8000, 9000):
-
-Choose weights-file **with the highest mAP (mean average precision)** or IoU (intersect over union)
-
-For example, **bigger mAP** gives weights `yolo-obj_8000.weights` - then **use this weights for detection**.
-
-Or just train with `-map` flag: 
-
-`darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` 
-
-So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calculated for each 4 Epochs using `valid=valid.txt` file that is specified in `obj.data` file (`1 Epoch = images_in_train_txt / batch` iterations)
-
-(to change the max x-axis value - change [`max_batches=`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) parameter to `2000*classes`, f.e. `max_batches=6000` for 3 classes)
-
-![loss_chart_map_chart](https://hsto.org/webt/yd/vl/ag/ydvlagutof2zcnjodstgroen8ac.jpeg)
-
-Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
-
-* **IoU** (intersect over union) - average instersect over union of objects and detections for a certain threshold = 0.24
-
-* **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf
-
-**mAP** is default metric of precision in the PascalVOC competition, **this is the same as AP50** metric in the MS COCO competition.
-In terms of Wiki, indicators Precision and Recall have a slightly different meaning than in the PascalVOC competition, but **IoU always has the same meaning**.
-
-![precision_recall_iou](https://hsto.org/files/ca8/866/d76/ca8866d76fb840228940dbf442a7f06a.jpg)
-
-
-### Custom object detection:
-
-Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
-
-| ![Yolo_v2_training](https://hsto.org/files/d12/1e7/515/d121e7515f6a4eb694913f10de5f2b61.jpg) | ![Yolo_v2_training](https://hsto.org/files/727/c7e/5e9/727c7e5e99bf4d4aa34027bb6a5e4bab.jpg) |
-|---|---|
-
-## How to improve object detection:
-
-1. Before training:
-
-* set flag `random=1` in your `.cfg`-file - it will increase precision by training Yolo for different resolutions: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L788)
-
-* increase network resolution in your `.cfg`-file (`height=608`, `width=608` or any value multiple of 32) - it will increase precision
-
-* check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark
-
-* my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong.
-
-* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more
-
-* desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects
-
-* What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected.
-
-* for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file) 
-  
-* for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = 23` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L895
-  * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L892
-  * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L989
-  
-* for training for both small and large objects use modified models:
-  * Full-model: 5 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3_5l.cfg
-  * Tiny-model: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny_3l.cfg
-  * YOLOv4: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-custom.cfg
-  
-* If you train the model to distinguish Left and Right objects as separate classes (left/right hand, left/right-turn on road signs, ...) then for disabling flip data augmentation - add `flip=0` here: https://github.com/AlexeyAB/darknet/blob/3d2d0a7c98dbc8923d9ff705b81ff4f7940ea6ff/cfg/yolov3.cfg#L17
-  
-* General rule - your training dataset should include such a set of relative sizes of objects that you want to detect: 
-  * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width`
-  * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height`
-
-  I.e. for each object from Test dataset there must be at least 1 object in the Training dataset with the same class_id and about the same relative size:
-
-  `object width in percent from Training dataset` ~= `object width in percent from Test dataset` 
-
-  That is, if only objects that occupied 80-90% of the image were present in the training set, then the trained network will not be able to detect objects that occupy 1-10% of the image.
-
-* to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file
-
-* each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used.
-
-* to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5.
-
-* Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file:
-`darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
-then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But you should change indexes of anchors `masks=` for each [yolo]-layer, so for YOLOv4 the 1st-[yolo]-layer has anchors smaller than 30x30, 2nd smaller than 60x60, 3rd remaining, and vice versa for YOLOv3. Also you should change the `filters=(classes + 5)*<number of mask>` before each [yolo]-layer. If many of the calculated anchors do not fit under the appropriate layers - then just try using all the default anchors.
-
-2. After training - for detection:
-
-* Increase network-resolution by set in your `.cfg`-file (`height=608` and `width=608`) or (`height=832` and `width=832`) or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9)
-
-* it is not necessary to train the network again, just use `.weights`-file already trained for 416x416 resolution
-
-* to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
-
-## How to mark bounded boxes of objects and create annotation files:
-
-Here you can find repository with GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 - v4: https://github.com/AlexeyAB/Yolo_mark
-
-With example of: `train.txt`, `obj.names`, `obj.data`, `yolo-obj.cfg`, `air`1-6`.txt`, `bird`1-4`.txt` for 2 classes of objects (air, bird) and `train_obj.cmd` with example how to train this image-set with Yolo v2 - v4
-
-Different tools for marking objects in images:
-
-1. in C++: https://github.com/AlexeyAB/Yolo_mark
-2. in Python: https://github.com/tzutalin/labelImg
-3. in Python: https://github.com/Cartucho/OpenLabeling
-4. in C++: https://www.ccoderun.ca/darkmark/
-5. in JavaScript: https://github.com/opencv/cvat
-6. in C++: https://github.com/jveitchmichaelis/deeplabel
-7. in C#: https://github.com/BMW-InnovationLab/BMW-Labeltool-Lite
-8. DL-Annotator for Windows ($30): [url](https://www.microsoft.com/en-us/p/dlannotator/9nsx79m7t8fn?activetab=pivot:overviewtab)
-9. v7labs - the greatest cloud labeling tool ($1.5 per hour): https://www.v7labs.com/
-
-## How to use Yolo as DLL and SO libraries
-
-* on Linux
-  * using `build.sh` or
-  * build `darknet` using `cmake` or
-  * set `LIBSO=1` in the `Makefile` and do `make`
-* on Windows
-  * using `build.ps1` or
-  * build `darknet` using `cmake` or
-  * compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution
-
-There are 2 APIs:
-
-* C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h
-  * Python examples using the C API:
-    * https://github.com/AlexeyAB/darknet/blob/master/darknet.py
-    * https://github.com/AlexeyAB/darknet/blob/master/darknet_video.py
-
-* C++ API: https://github.com/AlexeyAB/darknet/blob/master/include/yolo_v2_class.hpp
-  * C++ example that uses C++ API: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
-
-----
-
-1. To compile Yolo as C++ DLL-file `yolo_cpp_dll.dll` - open the solution `build\darknet\yolo_cpp_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_cpp_dll
-    * You should have installed **CUDA 10.0**
-    * To use cuDNN do: (right click on project) -> properties -> C/C++ -> Preprocessor -> Preprocessor Definitions, and add at the beginning of line: `CUDNN;`
-
-2. To use Yolo as DLL-file in your C++ console application - open the solution `build\darknet\yolo_console_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_console_dll
-
-    * you can run your console application from Windows Explorer `build\darknet\x64\yolo_console_dll.exe`
-    **use this command**: `yolo_console_dll.exe data/coco.names yolov4.cfg yolov4.weights test.mp4`
-
-    * after launching your console application and entering the image file name - you will see info for each object: 
-    `<obj_id> <left_x> <top_y> <width> <height> <probability>`
-    * to use simple OpenCV-GUI you should uncomment line `//#define OPENCV` in `yolo_console_dll.cpp`-file: [link](https://github.com/AlexeyAB/darknet/blob/a6cbaeecde40f91ddc3ea09aa26a03ab5bbf8ba8/src/yolo_console_dll.cpp#L5)
-    * you can see source code of simple example for detection on the video file: [link](https://github.com/AlexeyAB/darknet/blob/ab1c5f9e57b4175f29a6ef39e7e68987d3e98704/src/yolo_console_dll.cpp#L75)
-
-`yolo_cpp_dll.dll`-API: [link](https://github.com/AlexeyAB/darknet/blob/master/src/yolo_v2_class.hpp#L42)
-
-```cpp
-struct bbox_t {
-    unsigned int x, y, w, h;    // (x,y) - top-left corner, (w, h) - width & height of bounded box
-    float prob;                    // confidence - probability that the object was found correctly
-    unsigned int obj_id;        // class of object - from range [0, classes-1]
-    unsigned int track_id;        // tracking id for video (0 - untracked, 1 - inf - tracked object)
-    unsigned int frames_counter;// counter of frames on which the object was detected
-};
-
-class Detector {
-public:
-        Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
-        ~Detector();
-
-        std::vector<bbox_t> detect(std::string image_filename, float thresh = 0.2, bool use_mean = false);
-        std::vector<bbox_t> detect(image_t img, float thresh = 0.2, bool use_mean = false);
-        static image_t load_image(std::string image_filename);
-        static void free_image(image_t m);
-
-#ifdef OPENCV
-        std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false);
-        std::shared_ptr<image_t> mat_to_image_resize(cv::Mat mat) const;
-#endif
-};
-```
+Fork from https://github.com/AlexeyAB/darknet/
+Trying to optimizie streaming latency of IP camera and add RTMP streaming function via FFMPEG

From 91e3d7d35ff7cbef94954248d952c48b1c7e2f9a Mon Sep 17 00:00:00 2001
From: edwardxliu <44568088+edwardxliu@users.noreply.github.com>
Date: Tue, 8 Dec 2020 15:49:20 +0800
Subject: [PATCH 10/20] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 85f86e8f376..5fe6108c582 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,4 @@
 Fork from https://github.com/AlexeyAB/darknet/
+
+
 Trying to optimizie streaming latency of IP camera and add RTMP streaming function via FFMPEG

From 3f2d3fa62f135230d4b6a2360686077d4b8032e8 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Mon, 14 Dec 2020 20:30:50 +0800
Subject: [PATCH 11/20] input rtsp stream in stream.cpp with ffmpeg

---
 Makefile         |  2 +-
 src/stream.cpp   | 29 ++++++++++++++++-------------
 src/streamer.cpp |  8 ++++----
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/Makefile b/Makefile
index 886df70cf8a..7fb67797801 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ OPENMP=0
 LIBSO=1
 ZED_CAMERA=0
 ZED_CAMERA_v2_8=0
-STREAM=0
+STREAM=1
 FFMPEG=1
 
 # set GPU=1 and CUDNN=1 to speedup on GPU
diff --git a/src/stream.cpp b/src/stream.cpp
index 3f54edf1c51..75ac96f1f9e 100644
--- a/src/stream.cpp
+++ b/src/stream.cpp
@@ -33,12 +33,6 @@
 #include <vector>
 #include <opencv2/opencv.hpp>
 
-using namespace streamer;
-using time_point = std::chrono::high_resolution_clock::time_point;
-using high_resolution_clock = std::chrono::high_resolution_clock;
-using std::cerr;
-using std::endl;
-
 static char **demo_names;
 static image **demo_alphabet;
 static int demo_classes;
@@ -77,6 +71,12 @@ static volatile int run_detect_in_thread = 0;
 static int input_is_stream = 0;
 #endif
 
+using namespace streamer;
+using time_point = std::chrono::high_resolution_clock::time_point;
+using high_resolution_clock = std::chrono::high_resolution_clock;
+using std::cerr;
+using std::endl;
+
 class MovingAverage
 {
     int size;
@@ -148,7 +148,6 @@ void process_frame(mat_cv *mat_ptr, cv::Mat &out)
     }
 }
 
-
 void stream_frame(Streamer &streamer, const cv::Mat &image)
 {
     streamer.stream_frame(image.data);
@@ -241,6 +240,7 @@ double get_wall_time()
     }
     return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001;
 }
+
 void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes,
     int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host,
     int benchmark, int benchmark_layers,
@@ -365,6 +365,7 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
     int frame_counter = 0;
     int global_frame_counter = 0;
 
+
     Streamer streamer;
     int src_frame_width = get_width_mat(det_img);
     int src_frame_height = get_height_mat(det_img);
@@ -375,11 +376,12 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
         fprintf(stderr, "Please input a valid stream address \n");
         exit(1);
     }
+
     if (!dst_frame_width) dst_frame_width = src_frame_width;
     if (!dst_frame_height) dst_frame_height = src_frame_height;
     if (!stream_bitrate) stream_bitrate = 500000;
     if (!stream_fps) stream_fps = src_fps;
-    //if (!stream_profile) stream_profile = "high444";
+    if (!stream_profile) stream_profile = "high444";
     if (!stream_gop_size) stream_gop_size = 10;
 
     StreamerConfig streamer_config(src_frame_width, src_frame_height,
@@ -454,7 +456,8 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
             if(!prefix){
                 if (!dont_show) {
                     const int each_frame = max_val_cmp(1, avg_fps / 60);
-                    if(global_frame_counter % each_frame == 0){ //show_image_mat(show_img, "Demo");
+                    if(global_frame_counter % each_frame == 0){
+                        //show_image_mat(show_img, "Demo");
                         process_frame(show_img, proc_frame);
                         if(!filename){
                             stream_frame(streamer, proc_frame);
@@ -541,6 +544,9 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
                 start_time = get_time_point();
             }
         }
+#ifdef FFMPEG
+        av_pkt_unref();
+#endif
         time_stop = clk.now();
         elapsed_time = std::chrono::duration_cast<std::chrono::duration<double>>(time_stop - time_start);
         frame_time = std::chrono::duration_cast<std::chrono::duration<double>>(time_stop - time_prev);
@@ -548,13 +554,10 @@ void stream(char *cfgfile, char *weightfile, float thresh, float hier_thresh, in
         streamed_frames++;
         moving_average.add_value(frame_time.count());
         avg_frame_time = moving_average.get_average();
-        add_delay(streamed_frames, stream_fps, elapsed_time.count(), avg_frame_time);
+        //add_delay(streamed_frames, stream_fps, elapsed_time.count(), avg_frame_time);
 
         //ok = video_capture.read(read_frame);
         time_prev = time_stop;
-#ifdef FFMPEG
-        av_pkt_unref();
-#endif
     }
     printf("input video stream closed. \n");
     if (output_video_writer) {
diff --git a/src/streamer.cpp b/src/streamer.cpp
index 615352ecb4c..0b4c9ffe5dc 100644
--- a/src/streamer.cpp
+++ b/src/streamer.cpp
@@ -98,10 +98,10 @@ static int set_options_and_open_encoder(AVFormatContext *fctx, AVStream *stream,
     av_dict_set(&codec_options, "preset", "ultrafast", 0);
     av_dict_set(&codec_options, "tune", "zerolatency", 0);
     av_dict_set(&codec_options, "crf", "30", 0);
-    //av_dict_set(&codec_options, "g", "1", 0);
-    //av_dict_set(&codec_options, "ar", "44100", 0);
-    //av_dict_set(&codec_options, "strict", "-2", 0);
-    //av_dict_set(&codec_options, "-ac", "1", 0);
+    av_dict_set(&codec_options, "g", "1", 0);
+    av_dict_set(&codec_options, "ar", "44100", 0);
+    av_dict_set(&codec_options, "strict", "-2", 0);
+    av_dict_set(&codec_options, "ac", "1", 0);
     av_dict_set(&codec_options, "q", "10", 0);
 
     // open video encoder

From 50660eb3e39bac4acc673d353101c60259086270 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Mon, 14 Dec 2020 20:51:34 +0800
Subject: [PATCH 12/20] update

---
 README.md | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 README.md

diff --git a/README.md b/README.md
deleted file mode 100644
index 5fe6108c582..00000000000
--- a/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-Fork from https://github.com/AlexeyAB/darknet/
-
-
-Trying to optimizie streaming latency of IP camera and add RTMP streaming function via FFMPEG

From 816210a93d496f49fc69e4d66badbfc92c8244a8 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Mon, 14 Dec 2020 21:06:07 +0800
Subject: [PATCH 13/20] update

---
 CMakeLists.txt                         |    4 +-
 Makefile                               |    9 +-
 README.md                              |  708 +++++++++++++
 build/darknet/x64/cfg/cspx-p7-mish.cfg |   20 +-
 build/darknet/x64/cfg/yolov4-csp.cfg   | 1277 ++++++++++++++++++++++++
 build/darknet/x64/cfg/yolov4-tiny.cfg  |   17 +
 build/darknet/x64/cfg/yolov4x-mish.cfg |   13 +-
 cfg/cspx-p7-mish.cfg                   |   20 +-
 cfg/yolov4-csp.cfg                     | 1277 ++++++++++++++++++++++++
 cfg/yolov4x-mish.cfg                   |   13 +-
 include/darknet.h                      |   11 +
 src/convolutional_layer.c              |    4 +
 src/darknet.c                          |    2 +-
 src/demo.c                             |    1 +
 src/detector.c                         |   12 +-
 src/layer.c                            |    3 +
 src/network.c                          |  184 +++-
 src/network.h                          |    5 +
 src/parser.c                           |   45 +-
 src/parser.h                           |    2 +-
 src/yolo_layer.c                       |  182 +++-
 src/yolo_layer.h                       |    2 +-
 22 files changed, 3730 insertions(+), 81 deletions(-)
 create mode 100644 README.md
 create mode 100644 build/darknet/x64/cfg/yolov4-csp.cfg
 create mode 100644 cfg/yolov4-csp.cfg

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4c7224e4e8b..ea7c4a64760 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -62,7 +62,7 @@ include(CheckLanguage)
 check_language(CUDA)
 if(CMAKE_CUDA_COMPILER AND ENABLE_CUDA)
   set(CUDA_ARCHITECTURES "Auto" CACHE STRING "\"Auto\" detects local machine GPU compute arch at runtime, \"Common\" and \"All\" cover common and entire subsets of architectures, \"Names\" is a list of architectures to enable by name, \"Numbers\" is a list of compute capabilities (version number) to enable")
-  set_property(CACHE CUDA_ARCHITECTURES PROPERTY STRINGS "Auto" "Common" "All" "Kepler Maxwell Kepler+Tegra Maxwell+Tegra Pascal" "5.0 7.5")
+  set_property(CACHE CUDA_ARCHITECTURES PROPERTY STRINGS "Auto" "Common" "All" "Kepler Maxwell Kepler+Tegra Maxwell+Tegra Pascal" "5.0 7.5 8.6")
   enable_language(CUDA)
   if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "9.0")
     message(STATUS "Unsupported CUDA version, please upgrade to CUDA 9+. Disabling CUDA support")
@@ -71,7 +71,7 @@ if(CMAKE_CUDA_COMPILER AND ENABLE_CUDA)
     find_package(CUDA REQUIRED)
     cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS ${CUDA_ARCHITECTURES})
     message(STATUS "Building with CUDA flags: " "${CUDA_ARCH_FLAGS}")
-    if (NOT "arch=compute_70,code=sm_70" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_72,code=sm_72" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_75,code=sm_75" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_80,code=sm_80" IN_LIST CUDA_ARCH_FLAGS)
+    if (NOT "arch=compute_70,code=sm_70" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_72,code=sm_72" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_75,code=sm_75" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_80,code=sm_80" IN_LIST CUDA_ARCH_FLAGS AND NOT "arch=compute_86,code=sm_86" IN_LIST CUDA_ARCH_FLAGS)
       set(ENABLE_CUDNN_HALF "FALSE" CACHE BOOL "Enable CUDNN Half precision" FORCE)
       message(STATUS "Your setup does not supports half precision (it requires CC >= 7.0)")
     else()
diff --git a/Makefile b/Makefile
index 7fb67797801..743404c1ce2 100644
--- a/Makefile
+++ b/Makefile
@@ -19,14 +19,19 @@ FFMPEG=1
 USE_CPP=0
 DEBUG=1
 
-ARCH= -gencode arch=compute_30,code=sm_30 \
-      -gencode arch=compute_35,code=sm_35 \
+ARCH= -gencode arch=compute_35,code=sm_35 \
       -gencode arch=compute_50,code=[sm_50,compute_50] \
       -gencode arch=compute_52,code=[sm_52,compute_52] \
 	    -gencode arch=compute_61,code=[sm_61,compute_61]
 
 OS := $(shell uname)
 
+# GeForce RTX 3070, 3080, 3090
+# ARCH= -gencode arch=compute_86,code=[sm_86,compute_86]
+
+# Kepler GeForce GTX 770, GTX 760, GT 740
+# ARCH= -gencode arch=compute_30,code=sm_30
+
 # Tesla A100 (GA100), DGX-A100, RTX 3080
 # ARCH= -gencode arch=compute_80,code=[sm_80,compute_80]
 
diff --git a/README.md b/README.md
new file mode 100644
index 00000000000..203f77c7591
--- /dev/null
+++ b/README.md
@@ -0,0 +1,708 @@
+# Yolo v4, v3 and v2 for Windows and Linux
+
+## (neural networks for object detection)
+
+Paper YOLO v4: https://arxiv.org/abs/2004.10934
+
+Paper Scaled YOLO v4: https://arxiv.org/abs/2011.08036  use to reproduce results: [ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)
+
+More details in articles on medium: 
+ * [Scaled_YOLOv4](https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8) 
+ * [YOLOv4](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7) 
+
+Manual: https://github.com/AlexeyAB/darknet/wiki
+
+Discussion: 
+ - [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/)
+ - [Google-groups](https://groups.google.com/forum/#!forum/darknet)
+ - [Discord](https://discord.gg/zSq8rtW)
+
+About Darknet framework: http://pjreddie.com/darknet/
+
+[![Darknet Continuous Integration](https://github.com/AlexeyAB/darknet/workflows/Darknet%20Continuous%20Integration/badge.svg)](https://github.com/AlexeyAB/darknet/actions?query=workflow%3A%22Darknet+Continuous+Integration%22)
+[![CircleCI](https://circleci.com/gh/AlexeyAB/darknet.svg?style=svg)](https://circleci.com/gh/AlexeyAB/darknet)
+[![TravisCI](https://travis-ci.org/AlexeyAB/darknet.svg?branch=master)](https://travis-ci.org/AlexeyAB/darknet)
+[![Contributors](https://img.shields.io/github/contributors/AlexeyAB/Darknet.svg)](https://github.com/AlexeyAB/darknet/graphs/contributors)
+[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE)
+[![DOI](https://zenodo.org/badge/75388965.svg)](https://zenodo.org/badge/latestdoi/75388965)
+[![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2004.10934-B31B1B.svg)](https://arxiv.org/abs/2004.10934)
+[![colab](https://user-images.githubusercontent.com/4096485/86174089-b2709f80-bb29-11ea-9faf-3d8dc668a1a5.png)](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE)
+[![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg)
+
+
+* [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo)
+* [Requirements (and how to install dependecies)](#requirements)
+* [Pre-trained models](#pre-trained-models)
+* [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions)
+* [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations)
+* [Yolo v4 in other frameworks (TensorRT, TensorFlow, PyTorch, OpenVINO, OpenCV-dnn, TVM,...)](#yolo-v4-in-other-frameworks)
+* [Datasets](#datasets)
+
+0. [Improvements in this repository](#improvements-in-this-repository)
+1. [How to use](#how-to-use-on-the-command-line)
+2. How to compile on Linux
+   * [Using cmake](#how-to-compile-on-linux-using-cmake)
+   * [Using make](#how-to-compile-on-linux-using-make)
+3. How to compile on Windows
+   * [Using cmake](#how-to-compile-on-windows-using-cmake)
+   * [Using vcpkg](#how-to-compile-on-windows-using-vcpkg)
+   * [Legacy way](#how-to-compile-on-windows-legacy-way)
+4. [Training and Evaluation of speed and accuracy on MS COCO](https://github.com/AlexeyAB/darknet/wiki#training-and-evaluation-of-speed-and-accuracy-on-ms-coco)
+5. [How to train with multi-GPU:](#how-to-train-with-multi-gpu)
+6. [How to train (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects)
+7. [How to train tiny-yolo (to detect your custom objects)](#how-to-train-tiny-yolo-to-detect-your-custom-objects)
+8. [When should I stop training](#when-should-i-stop-training)
+9. [How to improve object detection](#how-to-improve-object-detection)
+10. [How to mark bounded boxes of objects and create annotation files](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files)
+11. [How to use Yolo as DLL and SO libraries](#how-to-use-yolo-as-dll-and-so-libraries)
+
+![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) 
+
+![scaled_yolov4](https://user-images.githubusercontent.com/4096485/101356322-f1f5a180-38a8-11eb-9907-4fe4f188d887.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036
+
+----
+
+![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934 
+
+
+tkDNN-TensorRT accelerates YOLOv4 **~2x** times for batch=1 and **3x-4x** times for batch=4.
+* tkDNN: https://github.com/ceccocats/tkDNN
+* OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf
+
+#### GeForce RTX 2080 Ti:
+| Network Size 	| Darknet, FPS (avg)| tkDNN TensorRT FP32, FPS  | tkDNN TensorRT FP16, FPS  | OpenCV FP16, FPS | tkDNN TensorRT FP16 batch=4, FPS  | OpenCV FP16 batch=4, FPS | tkDNN Speedup |
+|:-----:|:--------:|--------:|--------:|--------:|--------:|--------:|------:|
+|320	| 100 | 116 | **202** | 183 | 423 | **430** | **4.3x** |
+|416	| 82 | 103 | **162** | 159 | 284 | **294** | **3.6x** |
+|512	| 69 | 91 | 134 | **138** | 206 | **216** | **3.1x** |
+|608 	| 53 | 62 | 103 | **115**| 150 | **150** | **2.8x**  |
+|Tiny 416 | 443 | 609 | **790** | 773 | **1774** | 1353 | **3.5x**  |
+|Tiny 416 CPU Core i7 7700HQ | 3.4 | - | - | 42 | - | 39 | **12x**  |
+
+* Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png)
+* Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png)
+* CSPNet: [paper](https://arxiv.org/abs/1911.11929) and [map_fps](https://user-images.githubusercontent.com/4096485/71702416-6645dc00-2de0-11ea-8d65-de7d4b604021.png) comparison: https://github.com/WongKinYiu/CrossStagePartialNetworks
+* Yolo v3 on MS COCO: [Speed / Accuracy (mAP@0.5) chart](https://user-images.githubusercontent.com/4096485/52151356-e5d4a380-2683-11e9-9d7d-ac7bc192c477.jpg)
+* Yolo v3 on MS COCO (Yolo v3 vs RetinaNet) - Figure 3: https://arxiv.org/pdf/1804.02767v1.pdf
+* Yolo v2 on Pascal VOC 2007: https://hsto.org/files/a24/21e/068/a2421e0689fb43f08584de9d44c2215f.jpg
+* Yolo v2 on Pascal VOC 2012 (comp4): https://hsto.org/files/3a6/fdf/b53/3a6fdfb533f34cee9b52bdd9bb0b19d9.jpg
+
+#### Youtube video of results
+
+| [![Yolo v4](https://user-images.githubusercontent.com/4096485/101360000-1a33cf00-38ae-11eb-9e5e-b29c5fb0afbe.png)](https://youtu.be/1_SiUOYUoOI "Yolo v4") |  [![Scaled Yolo v4](https://user-images.githubusercontent.com/4096485/101359389-43a02b00-38ad-11eb-866c-f813e96bf61a.png)](https://youtu.be/YDFf-TqJOFE "Scaled Yolo v4") |
+|---|---|
+
+Others: https://www.youtube.com/user/pjreddie/videos
+
+#### How to evaluate AP of YOLOv4 on the MS COCO evaluation server
+
+1. Download and unzip test-dev2017 dataset from MS COCO server: http://images.cocodataset.org/zips/test2017.zip
+2. Download list of images for Detection taks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt
+3. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) )
+4. Content of the file `cfg/coco.data` should be
+
+```ini
+classes= 80
+train  = <replace with your path>/trainvalno5k.txt
+valid = <replace with your path>/testdev2017.txt
+names = data/coco.names
+backup = backup
+eval=coco
+```
+
+5. Create `/results/` folder near with `./darknet` executable file
+6. Run validation: `./darknet detector valid cfg/coco.data cfg/yolov4.cfg yolov4.weights`
+7. Rename the file  `/results/coco_results.json` to `detections_test-dev2017_yolov4_results.json` and compress it to `detections_test-dev2017_yolov4_results.zip`
+8. Submit file `detections_test-dev2017_yolov4_results.zip` to the MS COCO evaluation server for the `test-dev2019 (bbox)`
+
+#### How to evaluate FPS of YOLOv4 on GPU
+
+1. Compile Darknet with `GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=1` in the `Makefile`
+2. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) )
+3. Get any .avi/.mp4 video file (preferably not more than 1920x1080 to avoid bottlenecks in CPU performance)
+4. Run one of two commands and look at the AVG FPS:
+
+* include video_capturing + NMS + drawing_bboxes: 
+    `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -dont_show -ext_output`
+* exclude video_capturing + NMS + drawing_bboxes: 
+    `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -benchmark`
+
+#### Pre-trained models
+
+There are weights-file for different cfg-files (trained for MS COCO dataset):
+
+FPS on RTX 2070 (R) and Tesla V100 (V):
+
+* [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - **67.9% mAP@0.5 (49.4% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights) 
+   * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166
+
+* [yolov4-csp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg) - 202 MB: [yolov4-csp.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights) paper [Scaled Yolo v4](https://arxiv.org/abs/2011.08036)
+
+    just change `width=` and `height=` parameters in `yolov4-csp.cfg` file and use the same `yolov4-csp.weights` file for all cases:
+  * `width=608 height=608` in cfg: **66.2% mAP@0.5 (47.5% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops
+  * `width=512 height=512` in cfg: **64.8% mAP@0.5 (46.2% AP@0.5:0.95) - 93(V) FPS** - 77 (39 FMA) BFlops
+   * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142
+   
+* [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934)
+    just change `width=` and `height=` parameters in `yolov4.cfg` file and use the same `yolov4.weights` file for all cases:
+  * `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops
+  * `width=512 height=512` in cfg: **64.9% mAP@0.5 (43.0% AP@0.5:0.95) - 45(R) FPS / 83(V) FPS** - 91.1 BFlops
+  * `width=416 height=416` in cfg: **62.8% mAP@0.5 (41.2% AP@0.5:0.95) - 55(R) FPS / 96(V) FPS** - 60.1 BFlops
+  * `width=320 height=320` in cfg:   **60% mAP@0.5 (  38% AP@0.5:0.95) - 63(R) FPS / 123(V) FPS** - 35.5 BFlops
+
+* [yolov4-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg) - **40.2% mAP@0.5 - 371(1080Ti) FPS / 330(RTX2070) FPS** - 6.9 BFlops - 23.1 MB: [yolov4-tiny.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights)
+
+* [enet-coco.cfg (EfficientNetB0-Yolov3)](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/enet-coco.cfg) - **45.5% mAP@0.5 - 55(R) FPS** - 3.7 BFlops - 18.3 MB: [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view)
+
+* [yolov3-openimages.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-openimages.cfg) - 247 MB - 18(R) FPS - OpenImages dataset: [yolov3-openimages.weights](https://pjreddie.com/media/files/yolov3-openimages.weights)
+
+<details><summary><b>CLICK ME</b> - Yolo v3 models</summary>
+
+* [csresnext50-panet-spp-original-optimal.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp-original-optimal.cfg) - **65.4% mAP@0.5 (43.2% AP@0.5:0.95) - 32(R) FPS** - 100.5 BFlops - 217 MB: [csresnext50-panet-spp-original-optimal_final.weights](https://drive.google.com/open?id=1_NnfVgj0EDtb_WLNoXV8Mo7WKgwdYZCc)
+
+* [yolov3-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg) - **60.6% mAP@0.5 - 38(R) FPS** - 141.5 BFlops - 240 MB: [yolov3-spp.weights](https://pjreddie.com/media/files/yolov3-spp.weights)
+
+* [csresnext50-panet-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp.cfg) - **60.0% mAP@0.5 - 44 FPS** - 71.3 BFlops - 217 MB: [csresnext50-panet-spp_final.weights](https://drive.google.com/file/d/1aNXdM8qVy11nqTcd2oaVB3mf7ckr258-/view?usp=sharing)
+
+* [yolov3.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg) - **55.3% mAP@0.5 - 66(R) FPS** - 65.9 BFlops - 236 MB: [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights)
+
+* [yolov3-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg) - **33.1% mAP@0.5 - 345(R) FPS** - 5.6 BFlops - 33.7 MB: [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights)
+
+* [yolov3-tiny-prn.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny-prn.cfg) - **33.1% mAP@0.5 - 370(R) FPS** - 3.5 BFlops - 18.8 MB: [yolov3-tiny-prn.weights](https://drive.google.com/file/d/18yYZWyKbo4XSDVyztmsEcF9B_6bxrhUY/view?usp=sharing)
+
+</details>
+
+<details><summary><b>CLICK ME</b> - Yolo v2 models</summary>
+
+* `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights
+* `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights
+* `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights
+* `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights
+* `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights
+
+</details>
+
+Put it near compiled: darknet.exe
+
+You can get cfg-files by path: `darknet/cfg/`
+
+### Requirements
+
+* Windows or Linux
+* **CMake >= 3.12**: https://cmake.org/download/
+* **CUDA >= 10.0**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions))
+* **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png))
+* **cuDNN >= 7.0** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )
+* **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
+* on Linux **GCC or Clang**, on Windows **MSVC 2017/2019** https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community
+
+#### Yolo v4 in other frameworks
+
+* **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4
+* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Andriod: https://github.com/hunglc007/tensorflow-yolov4-tflite
+    For YOLOv3 - convert `yolov3.weights`/`cfg` files to `yolov3.ckpt`/`pb/meta`: by using [mystic123](https://github.com/mystic123/tensorflow-yolo-v3) project, and [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format)
+* **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
+* **Intel OpenVINO 2020 R4:** (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) )
+* **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn
+* **PyTorch > ONNX**: 
+    * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)
+    * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch)
+    * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
+    * [YOLOv5](https://github.com/ultralytics/yolov5)
+* **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/
+* **TensorRT** YOLOv4 on TensorRT+tkDNN: https://github.com/ceccocats/tkDNN
+    For YOLOv3 (-70% faster inference): [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) read [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx) implemented yolov3-spp, yolov4, etc.
+* **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream
+* **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/)
+* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backends (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
+* **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite
+* **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron
+
+#### Datasets
+
+* MS COCO: use `./scripts/get_coco_dataset.sh` to get labeled MS COCO detection dataset
+* OpenImages: use `python ./scripts/get_openimages_dataset.py` for labeling train detection dataset
+* Pascal VOC: use `python ./scripts/voc_label.py` for labeling Train/Test/Val detection datasets
+* ILSVRC2012 (ImageNet classification): use `./scripts/get_imagenet_train.sh` (also `imagenet_label.sh` for labeling valid set)
+* German/Belgium/Russian/LISA/MASTIF Traffic Sign Datasets for Detection - use this parsers: https://github.com/angeligareta/Datasets2Darknet#detection-task
+* List of other datasets: https://github.com/AlexeyAB/darknet/tree/master/scripts#datasets
+
+### Improvements in this repository
+
+* developed State-of-the-Art object detector YOLOv4
+* added State-of-Art models: CSP, PRN, EfficientNet
+* added layers: [conv_lstm], [scale_channels] SE/ASFF/BiFPN, [local_avgpool], [sam], [Gaussian_yolo], [reorg3d] (fixed [reorg]), fixed [batchnorm]
+* added the ability for training recurrent models (with layers conv-lstm`[conv_lstm]`/conv-rnn`[crnn]`) for accurate detection on video
+* added data augmentation: `[net] mixup=1 cutmix=1 mosaic=1 blur=1`. Added activations: SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX
+* added the ability for training with GPU-processing using CPU-RAM to increase the mini_batch_size and increase accuracy (instead of batch-norm sync)
+* improved binary neural network performance **2x-4x times** for Detection on CPU and GPU if you trained your own weights by using this XNOR-net model (bit-1 inference) : https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov3-tiny_xnor.cfg
+* improved neural network performance **~7%** by fusing 2 layers into 1: Convolutional + Batch-norm
+* improved performance: Detection **2x times**, on GPU Volta/Turing (Tesla V100, GeForce RTX, ...) using Tensor Cores if `CUDNN_HALF` defined in the `Makefile` or `darknet.sln`
+* improved performance **~1.2x** times on FullHD, **~2x** times on 4K, for detection on the video (file/stream) using `darknet detector demo`... 
+* improved performance **3.5 X times** of data augmentation for training (using OpenCV SSE/AVX functions instead of hand-written functions) - removes bottleneck for training on multi-GPU or GPU Volta
+* improved performance of detection and training on Intel CPU with AVX (Yolo v3 **~85%**)
+* optimized memory allocation during network resizing when `random=1`
+* optimized GPU initialization for detection - we use batch=1 initially instead of re-init with batch=1
+* added correct calculation of **mAP, F1, IoU, Precision-Recall** using command `darknet detector map`...
+* added drawing of chart of average-Loss and accuracy-mAP (`-map` flag) during training
+* run `./darknet detector demo ... -json_port 8070 -mjpeg_port 8090` as JSON and MJPEG server to get results online over the network by using your soft or Web-browser
+* added calculation of anchors for training
+* added example of Detection and Tracking objects: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
+* run-time tips and warnings if you use incorrect cfg-file or dataset
+* added support for Windows
+* many other fixes of code...
+
+And added manual - [How to train Yolo v4-v2 (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects)
+
+Also, you might be interested in using a simplified repository where is implemented INT8-quantization (+30% speedup and -1% mAP reduced): https://github.com/AlexeyAB/yolo2_light
+
+#### How to use on the command line
+
+On Linux use `./darknet` instead of `darknet.exe`, like this:`./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights`
+
+On Linux find executable file `./darknet` in the root directory, while on Windows find it in the directory `\build\darknet\x64` 
+
+* Yolo v4 COCO - **image**: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25`
+* **Output coordinates** of objects: `darknet.exe detector test cfg/coco.data yolov4.cfg yolov4.weights -ext_output dog.jpg`
+* Yolo v4 COCO - **video**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output test.mp4`
+* Yolo v4 COCO - **WebCam 0**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -c 0`
+* Yolo v4 COCO for **net-videocam** - Smart WebCam: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg`
+* Yolo v4 - **save result videofile res.avi**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -out_filename res.avi`
+* Yolo v3 **Tiny** COCO - video: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights test.mp4`
+* **JSON and MJPEG server** that allows multiple connections from your soft or Web-browser `ip-address:8070` and 8090: `./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -json_port 8070 -mjpeg_port 8090 -ext_output`
+* Yolo v3 Tiny **on GPU #1**: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights -i 1 test.mp4`
+* Alternative method Yolo v3 COCO - image: `darknet.exe detect cfg/yolov4.cfg yolov4.weights -i 0 -thresh 0.25`
+* Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): 
+    `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map`
+* 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights`
+* Remeber to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app
+* To process a list of images `data/train.txt` and save results of detection to `result.json` file use: 
+    `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt`
+* To process a list of images `data/train.txt` and save results of detection to `result.txt` use:                             
+    `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt`
+* Pseudo-lableing - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `<image_name>.txt` (in this way you can increase the amount of training data) use:
+    `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt`
+* To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
+* To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
+* To check accuracy mAP@IoU=75: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights -iou_thresh 0.75`
+
+##### For using network video-camera mjpeg-stream with any Android smartphone
+
+1. Download for Android phone mjpeg-stream soft: IP Webcam / Smart WebCam
+
+    * Smart WebCam - preferably: https://play.google.com/store/apps/details?id=com.acontech.android.SmartWebCam2
+    * IP Webcam: https://play.google.com/store/apps/details?id=com.pas.webcam
+
+2. Connect your Android phone to computer by WiFi (through a WiFi-router) or USB
+3. Start Smart WebCam on your phone
+4. Replace the address below, on shown in the phone application (Smart WebCam) and launch:
+
+* Yolo v4 COCO-model: `darknet.exe detector demo data/coco.data yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0`
+
+### How to compile on Linux/macOS (using `CMake`)
+
+The `CMakeLists.txt` will attempt to find installed optional dependencies like CUDA, cudnn, ZED and build against those. It will also create a shared object library file to use `darknet` for code development.
+
+Open a shell terminal inside the cloned repository and launch:
+
+```bash
+./build.sh
+```
+
+### How to compile on Linux (using `make`)
+
+Just do `make` in the darknet directory. (You can try to compile and run it on Google Colab in cloud [link](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) (press «Open in Playground» button at the top-left corner) and watch the video [link](https://www.youtube.com/watch?v=mKAEGSxwOAY) )
+Before make, you can set such options in the `Makefile`: [link](https://github.com/AlexeyAB/darknet/blob/9c1b9a2cf6363546c152251be578a21f3c3caec6/Makefile#L1)
+
+* `GPU=1` to build with CUDA to accelerate by using GPU (CUDA should be in `/usr/local/cuda`)
+* `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`)
+* `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x
+* `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams
+* `DEBUG=1` to bould debug version of Yolo
+* `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU
+* `LIBSO=1` to build a library `darknet.so` and binary runable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
+    or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4`
+* `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run
+    `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera`
+* You also need to specify for which graphics card the code is generated. This is done by setting `ARCH=`. If you use a never version than CUDA 11 you further need to edit line 20 from Makefile and remove `-gencode arch=compute_30,code=sm_30 \` as Kepler GPU support was dropped in CUDA 11. You can also drop the general `ARCH=` and just uncomment `ARCH=` for your graphics card.
+
+To run Darknet on Linux use examples from this article, just use `./darknet` instead of `darknet.exe`, i.e. use this command: `./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights`
+
+### How to compile on Windows (using `CMake`)
+
+This is the recommended approach to build Darknet on Windows.
+
+1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com)
+
+2. Install CUDA (at least v10.0) enabling VS Integration during installation.
+
+3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands:
+
+```PowerShell
+PS Code\>              git clone https://github.com/microsoft/vcpkg
+PS Code\>              cd vcpkg
+PS Code\vcpkg>         $env:VCPKG_ROOT=$PWD
+PS Code\vcpkg>         .\bootstrap-vcpkg.bat
+PS Code\vcpkg>         .\vcpkg install darknet[full]:x64-windows #replace with darknet[opencv-base,cuda,cudnn]:x64-windows for a quicker install of dependencies
+PS Code\vcpkg>         cd ..
+PS Code\>              git clone https://github.com/AlexeyAB/darknet
+PS Code\>              cd darknet
+PS Code\darknet>       .\build.ps1
+```
+
+## How to train with multi-GPU
+
+1. Train it first on 1 GPU for like 1000 iterations: `darknet.exe detector train cfg/coco.data cfg/yolov4.cfg yolov4.conv.137`
+
+2. Then stop and by using partially-trained model `/backup/yolov4_1000.weights` run training with multigpu (up to 4 GPUs): `darknet.exe detector train cfg/coco.data cfg/yolov4.cfg /backup/yolov4_1000.weights -gpus 0,1,2,3`
+
+If you get a Nan, then for some datasets better to decrease learning rate, for 4 GPUs set `learning_rate = 0,00065` (i.e. learning_rate = 0.00261 / GPUs). In this case also increase 4x times `burn_in =` in your cfg-file. I.e. use `burn_in = 4000` instead of `1000`.
+
+https://groups.google.com/d/msg/darknet/NbJqonJBTSY/Te5PfIpuCAAJ
+
+## How to train (to detect your custom objects)
+
+(to train old Yolo v2 `yolov2-voc.cfg`, `yolov2-tiny-voc.cfg`, `yolo-voc.cfg`, `yolo-voc.2.0.cfg`, ... [click by the link](https://github.com/AlexeyAB/darknet/tree/47c7af1cea5bbdedf1184963355e6418cb8b1b4f#how-to-train-pascal-voc-data))
+
+Training Yolo v4 (and v3):
+
+0. For training `cfg/yolov4-custom.cfg` download the pre-trained weights-file (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) )
+
+1. Create file `yolo-obj.cfg` with the same content as in `yolov4-custom.cfg` (or copy `yolov4-custom.cfg` to `yolo-obj.cfg)` and:
+
+* change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3)
+* change line subdivisions to [`subdivisions=16`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
+* change line max_batches to (`classes*2000` but not less than number of training images, but not less than number of training images and not less than `6000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes
+* change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22)
+* set network size `width=416 height=416` or any value multiple of 32: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9
+* change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers:
+  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610
+  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696
+  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783
+* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers.
+  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603
+  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689
+  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776
+* when using [`[Gaussian_yolo]`](https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L608)  layers, change [`filters=57`] filters=(classes + 9)x3 in the 3 `[convolutional]` before each `[Gaussian_yolo]` layer
+  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L604
+  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L696
+  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789
+
+So if `classes=1` then should be `filters=18`. If `classes=2` then write `filters=21`.
+  
+**(Do not write in the cfg-file: filters=(classes + 5)x3)**
+  
+(Generally `filters` depends on the `classes`, `coords` and number of `mask`s, i.e. filters=`(classes + coords + 1)*<number of mask>`, where `mask` is indices of anchors. If `mask` is absence, then filters=`(classes + coords + 1)*num`)
+
+So for example, for 2 objects, your file `yolo-obj.cfg` should differ from `yolov4-custom.cfg` in such lines in each of **3** [yolo]-layers:
+
+```ini
+[convolutional]
+filters=21
+
+[region]
+classes=2
+```
+
+2. Create file `obj.names` in the directory `build\darknet\x64\data\`, with objects names - each in new line
+
+3. Create file `obj.data` in the directory `build\darknet\x64\data\`, containing (where **classes = number of objects**):
+
+  ```ini
+  classes = 2
+  train  = data/train.txt
+  valid  = data/test.txt
+  names = data/obj.names
+  backup = backup/
+  ```
+
+4. Put image-files (.jpg) of your objects in the directory `build\darknet\x64\data\obj\`
+
+5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark
+
+It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: 
+
+`<object-class> <x_center> <y_center> <width> <height>`
+
+  Where: 
+  * `<object-class>` - integer object number from `0` to `(classes-1)`
+  * `<x_center> <y_center> <width> <height>` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]`
+  * for example: `<x> = <absolute_x> / <image_width>` or `<height> = <absolute_height> / <image_height>`
+  * atention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
+
+  For example for `img1.jpg` you will be created `img1.txt` containing:
+
+  ```
+  1 0.716797 0.395833 0.216406 0.147222
+  0 0.687109 0.379167 0.255469 0.158333
+  1 0.420312 0.395833 0.140625 0.166667
+  ```
+
+6. Create file `train.txt` in directory `build\darknet\x64\data\`, with filenames of your images, each filename in new line, with path relative to `darknet.exe`, for example containing:
+
+  ```
+  data/obj/img1.jpg
+  data/obj/img2.jpg
+  data/obj/img3.jpg
+  ```
+
+7. Download pre-trained weights for the convolutional layers and put to the directory `build\darknet\x64`
+    * for `yolov4.cfg`, `yolov4-custom.cfg` (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) )
+    * for `yolov4-tiny.cfg`, `yolov4-tiny-3l.cfg`, `yolov4-tiny-custom.cfg` (19 MB): [yolov4-tiny.conv.29](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29)  
+    * for `csresnext50-panet-spp.cfg` (133 MB): [csresnext50-panet-spp.conv.112](https://drive.google.com/file/d/16yMYCLQTY_oDlCIZPfn_sab6KD3zgzGq/view?usp=sharing)
+    * for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74)
+    * for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing)
+    * for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing)
+    
+
+8. Start training by using the command line: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137`
+     
+   To train on Linux use command: `./darknet detector train data/obj.data yolo-obj.cfg yolov4.conv.137` (just use `./darknet` instead of `darknet.exe`)
+     
+   * (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations)
+   * (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations)
+   * (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2)
+   * (to see the mAP & Loss-chart during training on remote server without GUI, use command `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` then open URL `http://ip-address:8090` in Chrome/Firefox browser)
+
+8.1. For training with mAP (mean average precisions) calculation for each 4 Epochs (set `valid=valid.txt` or `train.txt` in `obj.data` file) and run: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map`
+
+9. After training is complete - get result `yolo-obj_final.weights` from path `build\darknet\x64\backup\`
+
+ * After each 100 iterations you can stop and later start training from this point. For example, after 2000 iterations you can stop training, and later just start training using: `darknet.exe detector train data/obj.data yolo-obj.cfg backup\yolo-obj_2000.weights`
+
+    (in the original repository https://github.com/pjreddie/darknet the weights-file is saved only once every 10 000 iterations `if(iterations > 1000)`)
+
+ * Also you can get result earlier than all 45000 iterations.
+ 
+ **Note:** If during training you see `nan` values for `avg` (loss) field - then training goes wrong, but if `nan` is in some other lines - then training goes well.
+ 
+ **Note:** If you changed width= or height= in your cfg-file, then new width and height must be divisible by 32.
+ 
+ **Note:** After training use such command for detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
+ 
+  **Note:** if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
+ 
+### How to train tiny-yolo (to detect your custom objects):
+
+Do all the same steps as for the full yolo model as described above. With the exception of:
+* Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29
+ (Or get this file from yolov4-tiny.weights file by using command: `darknet.exe partial cfg/yolov4-tiny-custom.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29`
+* Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg`
+* Start training: `darknet.exe detector train data/obj.data yolov4-tiny-obj.cfg yolov4-tiny.conv.29`
+
+For training Yolo based on other models ([DenseNet201-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/densenet201_yolo.cfg) or [ResNet50-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/resnet50_yolo.cfg)), you can download and get pre-trained weights as showed in this file: https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/partial.cmd
+If you made you custom model that isn't based on other models, then you can train it without pre-trained weights, then will be used random initial weights.
+ 
+## When should I stop training:
+
+Usually sufficient 2000 iterations for each class(object), but not less than number of training images and not less than 6000 iterations in total. But for a more precise definition when you should stop training, use the following manual:
+
+1. During training, you will see varying indicators of error, and you should stop when no longer decreases **0.XXXXXXX avg**:
+
+  > Region Avg IOU: 0.798363, Class: 0.893232, Obj: 0.700808, No Obj: 0.004567, Avg Recall: 1.000000,  count: 8
+  > Region Avg IOU: 0.800677, Class: 0.892181, Obj: 0.701590, No Obj: 0.004574, Avg Recall: 1.000000,  count: 8
+  >
+  > **9002**: 0.211667, **0.60730 avg**, 0.001000 rate, 3.868000 seconds, 576128 images
+  > Loaded: 0.000000 seconds
+
+  * **9002** - iteration number (number of batch)
+  * **0.60730 avg** - average loss (error) - **the lower, the better**
+
+  When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset).
+  
+  Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. 
+
+2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them:
+
+For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to overfitting. **Overfitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**:
+
+![Overfitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) 
+
+To get weights from Early Stopping Point:
+
+  2.1. At first, in your file `obj.data` you must specify the path to the validation dataset `valid = valid.txt` (format of `valid.txt` as in `train.txt`), and if you haven't validation images, just copy `data\train.txt` to `data\valid.txt`.
+
+  2.2 If training is stopped after 9000 iterations, to validate some of previous weights use this commands:
+
+(If you use another GitHub repository, then use `darknet.exe detector recall`... instead of `darknet.exe detector map`...)
+
+* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
+* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights`
+* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights`
+
+And comapre last output lines for each weights (7000, 8000, 9000):
+
+Choose weights-file **with the highest mAP (mean average precision)** or IoU (intersect over union)
+
+For example, **bigger mAP** gives weights `yolo-obj_8000.weights` - then **use this weights for detection**.
+
+Or just train with `-map` flag: 
+
+`darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` 
+
+So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calculated for each 4 Epochs using `valid=valid.txt` file that is specified in `obj.data` file (`1 Epoch = images_in_train_txt / batch` iterations)
+
+(to change the max x-axis value - change [`max_batches=`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) parameter to `2000*classes`, f.e. `max_batches=6000` for 3 classes)
+
+![loss_chart_map_chart](https://hsto.org/webt/yd/vl/ag/ydvlagutof2zcnjodstgroen8ac.jpeg)
+
+Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
+
+* **IoU** (intersect over union) - average instersect over union of objects and detections for a certain threshold = 0.24
+
+* **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf
+
+**mAP** is default metric of precision in the PascalVOC competition, **this is the same as AP50** metric in the MS COCO competition.
+In terms of Wiki, indicators Precision and Recall have a slightly different meaning than in the PascalVOC competition, but **IoU always has the same meaning**.
+
+![precision_recall_iou](https://hsto.org/files/ca8/866/d76/ca8866d76fb840228940dbf442a7f06a.jpg)
+
+
+### Custom object detection:
+
+Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
+
+| ![Yolo_v2_training](https://hsto.org/files/d12/1e7/515/d121e7515f6a4eb694913f10de5f2b61.jpg) | ![Yolo_v2_training](https://hsto.org/files/727/c7e/5e9/727c7e5e99bf4d4aa34027bb6a5e4bab.jpg) |
+|---|---|
+
+## How to improve object detection:
+
+1. Before training:
+
+* set flag `random=1` in your `.cfg`-file - it will increase precision by training Yolo for different resolutions: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L788)
+
+* increase network resolution in your `.cfg`-file (`height=608`, `width=608` or any value multiple of 32) - it will increase precision
+
+* check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark
+
+* my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong.
+
+* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more
+
+* desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects
+
+* What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected.
+
+* for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file) 
+  
+* for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = 23` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L895
+  * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L892
+  * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L989
+  
+* for training for both small and large objects use modified models:
+  * Full-model: 5 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3_5l.cfg
+  * Tiny-model: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny_3l.cfg
+  * YOLOv4: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-custom.cfg
+  
+* If you train the model to distinguish Left and Right objects as separate classes (left/right hand, left/right-turn on road signs, ...) then for disabling flip data augmentation - add `flip=0` here: https://github.com/AlexeyAB/darknet/blob/3d2d0a7c98dbc8923d9ff705b81ff4f7940ea6ff/cfg/yolov3.cfg#L17
+  
+* General rule - your training dataset should include such a set of relative sizes of objects that you want to detect: 
+  * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width`
+  * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height`
+
+  I.e. for each object from Test dataset there must be at least 1 object in the Training dataset with the same class_id and about the same relative size:
+
+  `object width in percent from Training dataset` ~= `object width in percent from Test dataset` 
+
+  That is, if only objects that occupied 80-90% of the image were present in the training set, then the trained network will not be able to detect objects that occupy 1-10% of the image.
+
+* to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file
+
+* each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used.
+
+* to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5.
+
+* Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file:
+`darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
+then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But you should change indexes of anchors `masks=` for each [yolo]-layer, so for YOLOv4 the 1st-[yolo]-layer has anchors smaller than 30x30, 2nd smaller than 60x60, 3rd remaining, and vice versa for YOLOv3. Also you should change the `filters=(classes + 5)*<number of mask>` before each [yolo]-layer. If many of the calculated anchors do not fit under the appropriate layers - then just try using all the default anchors.
+
+2. After training - for detection:
+
+* Increase network-resolution by set in your `.cfg`-file (`height=608` and `width=608`) or (`height=832` and `width=832`) or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9)
+
+* it is not necessary to train the network again, just use `.weights`-file already trained for 416x416 resolution
+
+* to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
+
+## How to mark bounded boxes of objects and create annotation files:
+
+Here you can find repository with GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 - v4: https://github.com/AlexeyAB/Yolo_mark
+
+With example of: `train.txt`, `obj.names`, `obj.data`, `yolo-obj.cfg`, `air`1-6`.txt`, `bird`1-4`.txt` for 2 classes of objects (air, bird) and `train_obj.cmd` with example how to train this image-set with Yolo v2 - v4
+
+Different tools for marking objects in images:
+
+1. in C++: https://github.com/AlexeyAB/Yolo_mark
+2. in Python: https://github.com/tzutalin/labelImg
+3. in Python: https://github.com/Cartucho/OpenLabeling
+4. in C++: https://www.ccoderun.ca/darkmark/
+5. in JavaScript: https://github.com/opencv/cvat
+6. in C++: https://github.com/jveitchmichaelis/deeplabel
+7. in C#: https://github.com/BMW-InnovationLab/BMW-Labeltool-Lite
+8. DL-Annotator for Windows ($30): [url](https://www.microsoft.com/en-us/p/dlannotator/9nsx79m7t8fn?activetab=pivot:overviewtab)
+9. v7labs - the greatest cloud labeling tool ($1.5 per hour): https://www.v7labs.com/
+
+## How to use Yolo as DLL and SO libraries
+
+* on Linux
+  * using `build.sh` or
+  * build `darknet` using `cmake` or
+  * set `LIBSO=1` in the `Makefile` and do `make`
+* on Windows
+  * using `build.ps1` or
+  * build `darknet` using `cmake` or
+  * compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution
+
+There are 2 APIs:
+
+* C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h
+  * Python examples using the C API:
+    * https://github.com/AlexeyAB/darknet/blob/master/darknet.py
+    * https://github.com/AlexeyAB/darknet/blob/master/darknet_video.py
+
+* C++ API: https://github.com/AlexeyAB/darknet/blob/master/include/yolo_v2_class.hpp
+  * C++ example that uses C++ API: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
+
+----
+
+1. To compile Yolo as C++ DLL-file `yolo_cpp_dll.dll` - open the solution `build\darknet\yolo_cpp_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_cpp_dll
+    * You should have installed **CUDA 10.0**
+    * To use cuDNN do: (right click on project) -> properties -> C/C++ -> Preprocessor -> Preprocessor Definitions, and add at the beginning of line: `CUDNN;`
+
+2. To use Yolo as DLL-file in your C++ console application - open the solution `build\darknet\yolo_console_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_console_dll
+
+    * you can run your console application from Windows Explorer `build\darknet\x64\yolo_console_dll.exe`
+    **use this command**: `yolo_console_dll.exe data/coco.names yolov4.cfg yolov4.weights test.mp4`
+
+    * after launching your console application and entering the image file name - you will see info for each object: 
+    `<obj_id> <left_x> <top_y> <width> <height> <probability>`
+    * to use simple OpenCV-GUI you should uncomment line `//#define OPENCV` in `yolo_console_dll.cpp`-file: [link](https://github.com/AlexeyAB/darknet/blob/a6cbaeecde40f91ddc3ea09aa26a03ab5bbf8ba8/src/yolo_console_dll.cpp#L5)
+    * you can see source code of simple example for detection on the video file: [link](https://github.com/AlexeyAB/darknet/blob/ab1c5f9e57b4175f29a6ef39e7e68987d3e98704/src/yolo_console_dll.cpp#L75)
+
+`yolo_cpp_dll.dll`-API: [link](https://github.com/AlexeyAB/darknet/blob/master/src/yolo_v2_class.hpp#L42)
+
+```cpp
+struct bbox_t {
+    unsigned int x, y, w, h;    // (x,y) - top-left corner, (w, h) - width & height of bounded box
+    float prob;                    // confidence - probability that the object was found correctly
+    unsigned int obj_id;        // class of object - from range [0, classes-1]
+    unsigned int track_id;        // tracking id for video (0 - untracked, 1 - inf - tracked object)
+    unsigned int frames_counter;// counter of frames on which the object was detected
+};
+
+class Detector {
+public:
+        Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
+        ~Detector();
+
+        std::vector<bbox_t> detect(std::string image_filename, float thresh = 0.2, bool use_mean = false);
+        std::vector<bbox_t> detect(image_t img, float thresh = 0.2, bool use_mean = false);
+        static image_t load_image(std::string image_filename);
+        static void free_image(image_t m);
+
+#ifdef OPENCV
+        std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false);
+        std::shared_ptr<image_t> mat_to_image_resize(cv::Mat mat) const;
+#endif
+};
+```
diff --git a/build/darknet/x64/cfg/cspx-p7-mish.cfg b/build/darknet/x64/cfg/cspx-p7-mish.cfg
index b5713391bcc..01be7283680 100644
--- a/build/darknet/x64/cfg/cspx-p7-mish.cfg
+++ b/build/darknet/x64/cfg/cspx-p7-mish.cfg
@@ -2340,7 +2340,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 
 [yolo]
@@ -2352,7 +2352,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
@@ -2400,7 +2400,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 [yolo]
 mask = 4,5,6,7
@@ -2411,7 +2411,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
@@ -2459,7 +2459,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 [yolo]
 mask = 8,9,10,11
@@ -2470,7 +2470,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
@@ -2518,7 +2518,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 [yolo]
 mask = 12,13,14,15
@@ -2529,7 +2529,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
@@ -2577,7 +2577,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 [yolo]
 mask = 16,17,18,19
@@ -2588,7 +2588,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
diff --git a/build/darknet/x64/cfg/yolov4-csp.cfg b/build/darknet/x64/cfg/yolov4-csp.cfg
new file mode 100644
index 00000000000..23ebfd7461d
--- /dev/null
+++ b/build/darknet/x64/cfg/yolov4-csp.cfg
@@ -0,0 +1,1277 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=512
+height=512
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+#optimized_memory=1
+
+#23:104x104 54:52x52 85:26x26 104:13x13 for 416
+
+
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+#[convolutional]
+#batch_normalize=1
+#filters=64
+#size=1
+#stride=1
+#pad=1
+#activation=mish
+
+#[route]
+#layers = -2
+
+#[convolutional]
+#batch_normalize=1
+#filters=64
+#size=1
+#stride=1
+#pad=1
+#activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+#[convolutional]
+#batch_normalize=1
+#filters=64
+#size=1
+#stride=1
+#pad=1
+#activation=mish
+
+#[route]
+#layers = -1,-7
+
+#[convolutional]
+#batch_normalize=1
+#filters=64
+#size=1
+#stride=1
+#pad=1
+#activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-10
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-16
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+##########################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 79
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, -6
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 48
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[route]
+layers = -1, -6
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+##########################
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=logistic
+
+
+[yolo]
+mask = 0,1,2
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=0
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=4.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=5
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, -20
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-6
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=logistic
+
+
+[yolo]
+mask = 3,4,5
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=5
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -49
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-6
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=logistic
+
+
+[yolo]
+mask = 6,7,8
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
diff --git a/build/darknet/x64/cfg/yolov4-tiny.cfg b/build/darknet/x64/cfg/yolov4-tiny.cfg
index dc6f5bfb8c6..821a34bff72 100644
--- a/build/darknet/x64/cfg/yolov4-tiny.cfg
+++ b/build/darknet/x64/cfg/yolov4-tiny.cfg
@@ -17,11 +17,24 @@ hue=.1
 
 learning_rate=0.00261
 burn_in=1000
+
 max_batches = 500200
 policy=steps
 steps=400000,450000
 scales=.1,.1
 
+#max_batches = 501000
+#policy=steps
+#steps=500000
+#scales=0.01
+
+#weights_reject_freq=1001
+#ema_alpha=0.998
+#equidistant_point=1000
+#num_sigmas_reject_badlabels=3
+#badlabels_rejection_percentage=0.1
+
+
 [convolutional]
 batch_normalize=1
 filters=32
@@ -230,6 +243,8 @@ random=0
 resize=1.5
 nms_kind=greedynms
 beta_nms=0.6
+#new_coords=1
+#scale_x_y = 2.0
 
 [route]
 layers = -4
@@ -279,3 +294,5 @@ random=0
 resize=1.5
 nms_kind=greedynms
 beta_nms=0.6
+#new_coords=1
+#scale_x_y = 2.0
diff --git a/build/darknet/x64/cfg/yolov4x-mish.cfg b/build/darknet/x64/cfg/yolov4x-mish.cfg
index 0e9fb44132d..2ff854f6dcc 100644
--- a/build/darknet/x64/cfg/yolov4x-mish.cfg
+++ b/build/darknet/x64/cfg/yolov4x-mish.cfg
@@ -26,7 +26,7 @@ mosaic=1
 
 letter_box=1
 
-optimized_memory=1
+#optimized_memory=1
 
 [convolutional]
 batch_normalize=1
@@ -1150,7 +1150,7 @@ size=1
 stride=1
 pad=1
 filters=255
-activation=linear
+activation=logistic
 
 
 [yolo]
@@ -1159,6 +1159,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4
 classes=80
 num=9
 jitter=.1
+scale_x_y = 2.0
 objectness_smooth=0
 ignore_thresh = .7
 truth_thresh = 1
@@ -1172,7 +1173,7 @@ iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
 new_coords=1
-max_delta=20
+max_delta=5
 
 [route]
 layers = -4
@@ -1279,7 +1280,7 @@ size=1
 stride=1
 pad=1
 filters=255
-activation=linear
+activation=logistic
 
 
 [yolo]
@@ -1288,6 +1289,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4
 classes=80
 num=9
 jitter=.1
+scale_x_y = 2.0
 objectness_smooth=1
 ignore_thresh = .7
 truth_thresh = 1
@@ -1408,7 +1410,7 @@ size=1
 stride=1
 pad=1
 filters=255
-activation=linear
+activation=logistic
 
 
 [yolo]
@@ -1417,6 +1419,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4
 classes=80
 num=9
 jitter=.1
+scale_x_y = 2.0
 objectness_smooth=1
 ignore_thresh = .7
 truth_thresh = 1
diff --git a/cfg/cspx-p7-mish.cfg b/cfg/cspx-p7-mish.cfg
index b5713391bcc..01be7283680 100644
--- a/cfg/cspx-p7-mish.cfg
+++ b/cfg/cspx-p7-mish.cfg
@@ -2340,7 +2340,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 
 [yolo]
@@ -2352,7 +2352,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
@@ -2400,7 +2400,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 [yolo]
 mask = 4,5,6,7
@@ -2411,7 +2411,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
@@ -2459,7 +2459,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 [yolo]
 mask = 8,9,10,11
@@ -2470,7 +2470,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
@@ -2518,7 +2518,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 [yolo]
 mask = 12,13,14,15
@@ -2529,7 +2529,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
@@ -2577,7 +2577,7 @@ size=1
 stride=1
 pad=1
 filters=340
-activation=linear
+activation=logistic
 
 [yolo]
 mask = 16,17,18,19
@@ -2588,7 +2588,7 @@ ignore_thresh = .7
 truth_thresh = 1
 #random=1
 resize=1.5
-scale_x_y = 1.05
+scale_x_y = 2.0
 jitter=.1
 objectness_smooth=1
 iou_thresh=0.2
diff --git a/cfg/yolov4-csp.cfg b/cfg/yolov4-csp.cfg
new file mode 100644
index 00000000000..23ebfd7461d
--- /dev/null
+++ b/cfg/yolov4-csp.cfg
@@ -0,0 +1,1277 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=512
+height=512
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+#optimized_memory=1
+
+#23:104x104 54:52x52 85:26x26 104:13x13 for 416
+
+
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+#[convolutional]
+#batch_normalize=1
+#filters=64
+#size=1
+#stride=1
+#pad=1
+#activation=mish
+
+#[route]
+#layers = -2
+
+#[convolutional]
+#batch_normalize=1
+#filters=64
+#size=1
+#stride=1
+#pad=1
+#activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+#[convolutional]
+#batch_normalize=1
+#filters=64
+#size=1
+#stride=1
+#pad=1
+#activation=mish
+
+#[route]
+#layers = -1,-7
+
+#[convolutional]
+#batch_normalize=1
+#filters=64
+#size=1
+#stride=1
+#pad=1
+#activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-10
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-16
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+##########################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 79
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, -6
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 48
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[route]
+layers = -1, -6
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+##########################
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=logistic
+
+
+[yolo]
+mask = 0,1,2
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=0
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=4.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=5
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, -20
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-6
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=logistic
+
+
+[yolo]
+mask = 3,4,5
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=5
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -49
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-6
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=logistic
+
+
+[yolo]
+mask = 6,7,8
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
diff --git a/cfg/yolov4x-mish.cfg b/cfg/yolov4x-mish.cfg
index 0e9fb44132d..2ff854f6dcc 100644
--- a/cfg/yolov4x-mish.cfg
+++ b/cfg/yolov4x-mish.cfg
@@ -26,7 +26,7 @@ mosaic=1
 
 letter_box=1
 
-optimized_memory=1
+#optimized_memory=1
 
 [convolutional]
 batch_normalize=1
@@ -1150,7 +1150,7 @@ size=1
 stride=1
 pad=1
 filters=255
-activation=linear
+activation=logistic
 
 
 [yolo]
@@ -1159,6 +1159,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4
 classes=80
 num=9
 jitter=.1
+scale_x_y = 2.0
 objectness_smooth=0
 ignore_thresh = .7
 truth_thresh = 1
@@ -1172,7 +1173,7 @@ iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
 new_coords=1
-max_delta=20
+max_delta=5
 
 [route]
 layers = -4
@@ -1279,7 +1280,7 @@ size=1
 stride=1
 pad=1
 filters=255
-activation=linear
+activation=logistic
 
 
 [yolo]
@@ -1288,6 +1289,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4
 classes=80
 num=9
 jitter=.1
+scale_x_y = 2.0
 objectness_smooth=1
 ignore_thresh = .7
 truth_thresh = 1
@@ -1408,7 +1410,7 @@ size=1
 stride=1
 pad=1
 filters=255
-activation=linear
+activation=logistic
 
 
 [yolo]
@@ -1417,6 +1419,7 @@ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4
 classes=80
 num=9
 jitter=.1
+scale_x_y = 2.0
 objectness_smooth=1
 ignore_thresh = .7
 truth_thresh = 1
diff --git a/include/darknet.h b/include/darknet.h
index 3f90f0ad875..040d9e35999 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -414,6 +414,10 @@ struct layer {
     float *scales;
     float *scale_updates;
 
+    float *weights_ema;
+    float *biases_ema;
+    float *scales_ema;
+
     float *weights;
     float *weight_updates;
 
@@ -697,8 +701,15 @@ typedef struct network {
     int n;
     int batch;
     uint64_t *seen;
+    float *badlabels_reject_threshold;
+    float *delta_rolling_max;
     float *delta_rolling_avg;
+    float *delta_rolling_std;
+    int weights_reject_freq;
     int equidistant_point;
+    float badlabels_rejection_percentage;
+    float num_sigmas_reject_badlabels;
+    float ema_alpha;
     int *cur_iteration;
     float loss_scale;
     int *t;
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index 130f57fabb0..1d52dd1d23c 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -566,6 +566,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
         if (train) {
             l.weight_updates = (float*)xcalloc(l.nweights, sizeof(float));
             l.bias_updates = (float*)xcalloc(n, sizeof(float));
+
+            l.weights_ema = (float*)xcalloc(l.nweights, sizeof(float));
+            l.biases_ema = (float*)xcalloc(n, sizeof(float));
         }
     }
 
@@ -637,6 +640,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
                 l.scales[i] = 1;
             }
             if (train) {
+                l.scales_ema = (float*)xcalloc(n, sizeof(float));
                 l.scale_updates = (float*)xcalloc(n, sizeof(float));
 
                 l.mean = (float*)xcalloc(n, sizeof(float));
diff --git a/src/darknet.c b/src/darknet.c
index 92a9c193035..13ab75f3d38 100644
--- a/src/darknet.c
+++ b/src/darknet.c
@@ -178,7 +178,7 @@ void partial(char *cfgfile, char *weightfile, char *outfile, int max)
     }
     *net.seen = 0;
     *net.cur_iteration = 0;
-    save_weights_upto(net, outfile, max);
+    save_weights_upto(net, outfile, max, 0);
 }
 
 #include "convolutional_layer.h"
diff --git a/src/demo.c b/src/demo.c
index c7bf4a5debe..dfbc3c5e796 100644
--- a/src/demo.c
+++ b/src/demo.c
@@ -171,6 +171,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
     if(weightfile){
         load_weights(&net, weightfile);
     }
+    if (net.letter_box) letter_box = 1;
     net.benchmark_layers = benchmark_layers;
     fuse_conv_batchnorm(net);
     calculate_binary_weights(net);
diff --git a/src/detector.c b/src/detector.c
index 88a04acecb7..5c84bfcb542 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -389,7 +389,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
 
         //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) {
         //if (i % 100 == 0) {
-        if (iteration >= (iter_save + 1000) || iteration % 1000 == 0) {
+        if ((iteration >= (iter_save + 10000) || iteration % 10000 == 0) ||
+            (iteration >= (iter_save + 1000) || iteration % 1000 == 0) && net.max_batches < 10000)
+        {
             iter_save = iteration;
 #ifdef GPU
             if (ngpus != 1) sync_nets(nets, ngpus, 0);
@@ -407,6 +409,12 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
             char buff[256];
             sprintf(buff, "%s/%s_last.weights", backup_directory, base);
             save_weights(net, buff);
+
+            if (net.ema_alpha && is_ema_initialized(net)) {
+                sprintf(buff, "%s/%s_ema.weights", backup_directory, base);
+                save_weights_upto(net, buff, net.n, 1);
+                printf(" EMA weights are saved to the file: %s \n", buff);
+            }
         }
         free_data(train);
     }
@@ -1007,6 +1015,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     args.w = net.w;
     args.h = net.h;
     args.c = net.c;
+    letter_box = net.letter_box;
     if (letter_box) args.type = LETTERBOX_DATA;
     else args.type = IMAGE_DATA;
 
@@ -1611,6 +1620,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
     if (weightfile) {
         load_weights(&net, weightfile);
     }
+    if (net.letter_box) letter_box = 1;
     net.benchmark_layers = benchmark_layers;
     fuse_conv_batchnorm(net);
     calculate_binary_weights(net);
diff --git a/src/layer.c b/src/layer.c
index fe3827d2ad5..032a24e0f2c 100644
--- a/src/layer.c
+++ b/src/layer.c
@@ -92,6 +92,9 @@ void free_layer_custom(layer l, int keep_cudnn_desc)
     if (l.bias_updates)       free(l.bias_updates), l.bias_updates = NULL;
     if (l.scales)             free(l.scales), l.scales = NULL;
     if (l.scale_updates)      free(l.scale_updates), l.scale_updates = NULL;
+    if (l.biases_ema)         free(l.biases_ema), l.biases = NULL;
+    if (l.scales_ema)         free(l.scales_ema), l.scales = NULL;
+    if (l.weights_ema)        free(l.weights_ema), l.weights = NULL;
     if (l.weights)            free(l.weights), l.weights = NULL;
     if (l.weight_updates)     free(l.weight_updates), l.weight_updates = NULL;
     if (l.align_bit_weights)  free(l.align_bit_weights);
diff --git a/src/network.c b/src/network.c
index a59f2e478b1..9dec1df5761 100644
--- a/src/network.c
+++ b/src/network.c
@@ -245,7 +245,10 @@ network make_network(int n)
     net.n = n;
     net.layers = (layer*)xcalloc(net.n, sizeof(layer));
     net.seen = (uint64_t*)xcalloc(1, sizeof(uint64_t));
-    net.delta_rolling_avg = (float*)xcalloc(1, sizeof(float));    
+    net.badlabels_reject_threshold = (float*)xcalloc(1, sizeof(float));
+    net.delta_rolling_max = (float*)xcalloc(1, sizeof(float));
+    net.delta_rolling_avg = (float*)xcalloc(1, sizeof(float));
+    net.delta_rolling_std = (float*)xcalloc(1, sizeof(float));
     net.cur_iteration = (int*)xcalloc(1, sizeof(int));
     net.total_bbox = (int*)xcalloc(1, sizeof(int));
     net.rewritten_bbox = (int*)xcalloc(1, sizeof(int));
@@ -422,6 +425,45 @@ float train_network_waitkey(network net, data d, int wait_key)
 #else   // GPU
     update_network(net);
 #endif  // GPU
+
+    int ema_start_point = net.max_batches / 2;
+
+    if (net.ema_alpha && (*net.cur_iteration) >= ema_start_point)
+    {
+        int ema_period = (net.max_batches - ema_start_point - 1000) * (1.0 - net.ema_alpha);
+        int ema_apply_point = net.max_batches - 1000;
+
+        if (!is_ema_initialized(net))
+        {
+            ema_update(net, 0); // init EMA
+            printf(" EMA initialization \n");
+        }
+
+        if ((*net.cur_iteration) == ema_apply_point)
+        {
+            ema_apply(net); // apply EMA (BN rolling mean/var recalculation is required)
+            printf(" ema_apply() \n");
+        }
+        else
+        if ((*net.cur_iteration) < ema_apply_point)// && (*net.cur_iteration) % ema_period == 0)
+        {
+            ema_update(net, net.ema_alpha); // update EMA
+            printf(" ema_update(), ema_alpha = %f \n", net.ema_alpha);
+        }
+    }
+
+
+    int reject_stop_point = net.max_batches*3/4;
+
+    if ((*net.cur_iteration) < reject_stop_point &&
+        net.weights_reject_freq &&
+        (*net.cur_iteration) % net.weights_reject_freq == 0)
+    {
+        float sim_threshold = 0.4;
+        reject_similar_weights(net, sim_threshold);
+    }
+
+
     free(X);
     free(y);
     return (float)sum/(n*batch);
@@ -1182,7 +1224,10 @@ void free_network(network net)
     free(net.scales);
     free(net.steps);
     free(net.seen);
-    free(net.delta_rolling_avg);    
+    free(net.badlabels_reject_threshold);
+    free(net.delta_rolling_max);
+    free(net.delta_rolling_avg);
+    free(net.delta_rolling_std);
     free(net.cur_iteration);
     free(net.total_bbox);
     free(net.rewritten_bbox);
@@ -1483,3 +1528,138 @@ void restore_network_recurrent_state(network net)
         if (net.layers[k].type == CRNN) free_state_crnn(net.layers[k]);
     }
 }
+
+
+int is_ema_initialized(network net)
+{
+    int i;
+    for (i = 0; i < net.n; ++i) {
+        layer l = net.layers[i];
+        if (l.type == CONVOLUTIONAL) {
+            int k;
+            if (l.weights_ema) {
+                for (k = 0; k < l.nweights; ++k) {
+                    if (l.weights_ema[k] != 0) return 1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+void ema_update(network net, float ema_alpha)
+{
+    int i;
+    for (i = 0; i < net.n; ++i) {
+        layer l = net.layers[i];
+        if (l.type == CONVOLUTIONAL) {
+#ifdef GPU
+            if (gpu_index >= 0) {
+                pull_convolutional_layer(l);
+            }
+#endif
+            int k;
+            if (l.weights_ema) {
+                for (k = 0; k < l.nweights; ++k) {
+                    l.weights_ema[k] = ema_alpha * l.weights_ema[k] + (1 - ema_alpha) * l.weights[k];
+                }
+            }
+
+            for (k = 0; k < l.n; ++k) {
+                if (l.biases_ema) l.biases_ema[k] = ema_alpha * l.biases_ema[k] + (1 - ema_alpha) * l.biases[k];
+                if (l.scales_ema) l.scales_ema[k] = ema_alpha * l.scales_ema[k] + (1 - ema_alpha) * l.scales[k];
+            }
+        }
+    }
+}
+
+
+void ema_apply(network net)
+{
+    int i;
+    for (i = 0; i < net.n; ++i) {
+        layer l = net.layers[i];
+        if (l.type == CONVOLUTIONAL) {
+            int k;
+            if (l.weights_ema) {
+                for (k = 0; k < l.nweights; ++k) {
+                    l.weights[k] = l.weights_ema[k];
+                }
+            }
+
+            for (k = 0; k < l.n; ++k) {
+                if (l.biases_ema) l.biases[k] = l.biases_ema[k];
+                if (l.scales_ema) l.scales[k] = l.scales_ema[k];
+            }
+
+#ifdef GPU
+            if (gpu_index >= 0) {
+                push_convolutional_layer(l);
+            }
+#endif
+        }
+    }
+}
+
+
+
+void reject_similar_weights(network net, float sim_threshold)
+{
+    int i;
+    for (i = 0; i < net.n; ++i) {
+        layer l = net.layers[i];
+        if (i == 0) continue;
+        if (net.n > i + 1) if (net.layers[i + 1].type == YOLO) continue;
+        if (net.n > i + 2) if (net.layers[i + 2].type == YOLO) continue;
+        if (net.n > i + 3) if (net.layers[i + 3].type == YOLO) continue;
+
+        if (l.type == CONVOLUTIONAL && l.activation != LINEAR) {
+#ifdef GPU
+            if (gpu_index >= 0) {
+                pull_convolutional_layer(l);
+            }
+#endif
+            int k, j;
+            float max_sim = -1000;
+            int max_sim_index = 0;
+            int max_sim_index2 = 0;
+            int filter_size = l.size*l.size*l.c;
+            for (k = 0; k < l.n; ++k)
+            {
+                for (j = k+1; j < l.n; ++j)
+                {
+                    int w1 = k;
+                    int w2 = j;
+
+                    float sim = cosine_similarity(&l.weights[filter_size*w1], &l.weights[filter_size*w2], filter_size);
+                    if (sim > max_sim) {
+                        max_sim = sim;
+                        max_sim_index = w1;
+                        max_sim_index2 = w2;
+                    }
+                }
+            }
+
+            printf(" reject_similar_weights: i = %d, l.n = %d, w1 = %d, w2 = %d, sim = %f, thresh = %f \n",
+                i, l.n, max_sim_index, max_sim_index2, max_sim, sim_threshold);
+
+            if (max_sim > sim_threshold) {
+                printf(" rejecting... \n");
+                float scale = sqrt(2. / (l.size*l.size*l.c / l.groups));
+
+                for (k = 0; k < filter_size; ++k) {
+                    l.weights[max_sim_index*filter_size + k] = scale*rand_uniform(-1, 1);
+                }
+                if (l.biases) l.biases[max_sim_index] = 0.0f;
+                if (l.scales) l.scales[max_sim_index] = 1.0f;
+            }
+
+#ifdef GPU
+            if (gpu_index >= 0) {
+                push_convolutional_layer(l);
+            }
+#endif
+        }
+    }
+}
diff --git a/src/network.h b/src/network.h
index 42d68d6c3c3..7661c8ef806 100644
--- a/src/network.h
+++ b/src/network.h
@@ -171,6 +171,11 @@ void free_network_recurrent_state(network net);
 void randomize_network_recurrent_state(network net);
 void remember_network_recurrent_state(network net);
 void restore_network_recurrent_state(network net);
+int is_ema_initialized(network net);
+void ema_update(network net, float ema_alpha);
+void ema_apply(network net);
+void reject_similar_weights(network net, float sim_threshold);
+
 
 #ifdef __cplusplus
 }
diff --git a/src/parser.c b/src/parser.c
index 74d60c8e306..eb41e91457b 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -454,7 +454,7 @@ layer parse_yolo(list *options, size_params params)
     }
     //assert(l.outputs == params.inputs);
 
-    l.show_details = option_find_int_quiet(options, "show_details", 0);
+    l.show_details = option_find_int_quiet(options, "show_details", 1);
     l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
     char *cpc = option_find_str(options, "counters_per_class", 0);
     l.classes_multipliers = get_classes_multipliers(cpc, classes, l.max_delta);
@@ -1155,8 +1155,15 @@ void parse_net_options(list *options, network *net)
     net->batch *= net->time_steps;  // mini_batch * time_steps
     net->subdivisions = subdivs;    // number of mini_batches
 
+    net->weights_reject_freq = option_find_int_quiet(options, "weights_reject_freq", 0);
     net->equidistant_point = option_find_int_quiet(options, "equidistant_point", 0);
+    net->badlabels_rejection_percentage = option_find_float_quiet(options, "badlabels_rejection_percentage", 0);
+    net->num_sigmas_reject_badlabels = option_find_float_quiet(options, "num_sigmas_reject_badlabels", 0);
+    net->ema_alpha = option_find_float_quiet(options, "ema_alpha", 0);
+    *net->badlabels_reject_threshold = 0;
+    *net->delta_rolling_max = 0;
     *net->delta_rolling_avg = 0;
+    *net->delta_rolling_std = 0;
     *net->seen = 0;
     *net->cur_iteration = 0;
     net->loss_scale = option_find_float_quiet(options, "loss_scale", 1);
@@ -1819,6 +1826,31 @@ void save_convolutional_weights(layer l, FILE *fp)
     //}
 }
 
+void save_convolutional_weights_ema(layer l, FILE *fp)
+{
+    if (l.binary) {
+        //save_convolutional_weights_binary(l, fp);
+        //return;
+    }
+#ifdef GPU
+    if (gpu_index >= 0) {
+        pull_convolutional_layer(l);
+    }
+#endif
+    int num = l.nweights;
+    fwrite(l.biases_ema, sizeof(float), l.n, fp);
+    if (l.batch_normalize) {
+        fwrite(l.scales_ema, sizeof(float), l.n, fp);
+        fwrite(l.rolling_mean, sizeof(float), l.n, fp);
+        fwrite(l.rolling_variance, sizeof(float), l.n, fp);
+    }
+    fwrite(l.weights_ema, sizeof(float), num, fp);
+    //if(l.adam){
+    //    fwrite(l.m, sizeof(float), num, fp);
+    //    fwrite(l.v, sizeof(float), num, fp);
+    //}
+}
+
 void save_batchnorm_weights(layer l, FILE *fp)
 {
 #ifdef GPU
@@ -1848,7 +1880,7 @@ void save_connected_weights(layer l, FILE *fp)
     }
 }
 
-void save_weights_upto(network net, char *filename, int cutoff)
+void save_weights_upto(network net, char *filename, int cutoff, int save_ema)
 {
 #ifdef GPU
     if(net.gpu_index >= 0){
@@ -1872,7 +1904,12 @@ void save_weights_upto(network net, char *filename, int cutoff)
     for(i = 0; i < net.n && i < cutoff; ++i){
         layer l = net.layers[i];
         if (l.type == CONVOLUTIONAL && l.share_layer == NULL) {
-            save_convolutional_weights(l, fp);
+            if (save_ema) {
+                save_convolutional_weights_ema(l, fp);
+            }
+            else {
+                save_convolutional_weights(l, fp);
+            }
         } if (l.type == SHORTCUT && l.nweights > 0) {
             save_shortcut_weights(l, fp);
         } if(l.type == CONNECTED){
@@ -1935,7 +1972,7 @@ void save_weights_upto(network net, char *filename, int cutoff)
 }
 void save_weights(network net, char *filename)
 {
-    save_weights_upto(net, filename, net.n);
+    save_weights_upto(net, filename, net.n, 0);
 }
 
 void transpose_matrix(float *a, int rows, int cols)
diff --git a/src/parser.h b/src/parser.h
index 5e0d3fd1295..05241167d20 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -9,7 +9,7 @@ network parse_network_cfg(char *filename);
 network parse_network_cfg_custom(char *filename, int batch, int time_steps);
 void save_network(network net, char *filename);
 void save_weights(network net, char *filename);
-void save_weights_upto(network net, char *filename, int cutoff);
+void save_weights_upto(network net, char *filename, int cutoff, int save_ema);
 void save_weights_double(network net, char *filename);
 void load_weights(network *net, char *filename);
 void load_weights_upto(network *net, char *filename, int cutoff);
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index d756d12242a..cd06c0fb7ad 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -138,8 +138,8 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw
     // w = ln(t.w * net.w / anchors_w); // w - output of previous conv-layer
     // h = ln(t.h * net.h / anchors_h); // h - output of previous conv-layer
     if (new_coords) {
-        b.x = (i + x[index + 0 * stride] * 2 - 0.5) / lw;
-        b.y = (j + x[index + 1 * stride] * 2 - 0.5) / lh;
+        b.x = (i + x[index + 0 * stride]) / lw;
+        b.y = (j + x[index + 1 * stride]) / lh;
         b.w = x[index + 2 * stride] * x[index + 2 * stride] * 4 * biases[2 * n] / w;
         b.h = x[index + 3 * stride] * x[index + 3 * stride] * 4 * biases[2 * n + 1] / h;
     }
@@ -197,8 +197,8 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i,
         float th = log(truth.h*h / biases[2 * n + 1]);
 
         if (new_coords) {
-            tx = (truth.x*lw - i + 0.5) / 2;
-            ty = (truth.y*lh - j + 0.5) / 2;
+            //tx = (truth.x*lw - i + 0.5) / 2;
+            //ty = (truth.y*lh - j + 0.5) / 2;
             tw = sqrt(truth.w*w / (4 * biases[2 * n]));
             th = sqrt(truth.h*h / (4 * biases[2 * n + 1]));
         }
@@ -230,16 +230,28 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i,
         float dw = all_ious.dx_iou.dl;
         float dh = all_ious.dx_iou.dr;
 
+
         // predict exponential, apply gradient of e^delta_t ONLY for w,h
         if (new_coords) {
-            dw *= 8 * x[index + 2 * stride];
-            dh *= 8 * x[index + 3 * stride];
+            //dw *= 8 * x[index + 2 * stride];
+            //dh *= 8 * x[index + 3 * stride];
+            //dw *= 8 * x[index + 2 * stride] * biases[2 * n] / w;
+            //dh *= 8 * x[index + 3 * stride] * biases[2 * n + 1] / h;
+
+            //float grad_w = 8 * exp(-x[index + 2 * stride]) / pow(exp(-x[index + 2 * stride]) + 1, 3);
+            //float grad_h = 8 * exp(-x[index + 3 * stride]) / pow(exp(-x[index + 3 * stride]) + 1, 3);
+            //dw *= grad_w;
+            //dh *= grad_h;
         }
         else {
             dw *= exp(x[index + 2 * stride]);
             dh *= exp(x[index + 3 * stride]);
         }
 
+
+        //dw *= exp(x[index + 2 * stride]);
+        //dh *= exp(x[index + 3 * stride]);
+
         // normalize iou weight
         dx *= iou_normalizer;
         dy *= iou_normalizer;
@@ -368,6 +380,8 @@ typedef struct train_yolo_args {
     int b;
 
     float tot_iou;
+    float tot_giou_loss;
+    float tot_iou_loss;
     int count;
     int class_count;
 } train_yolo_args;
@@ -388,8 +402,8 @@ void *process_batch(void* ptr)
         float tot_giou = 0;
         float tot_diou = 0;
         float tot_ciou = 0;
-        float tot_iou_loss = 0;
-        float tot_giou_loss = 0;
+        //float tot_iou_loss = 0;
+        //float tot_giou_loss = 0;
         float tot_diou_loss = 0;
         float tot_ciou_loss = 0;
         float recall = 0;
@@ -540,10 +554,10 @@ void *process_batch(void* ptr)
 
                 // range is 0 <= 1
                 args->tot_iou += all_ious.iou;
-                tot_iou_loss += 1 - all_ious.iou;
+                args->tot_iou_loss += 1 - all_ious.iou;
                 // range is -1 <= giou <= 1
                 tot_giou += all_ious.giou;
-                tot_giou_loss += 1 - all_ious.giou;
+                args->tot_giou_loss += 1 - all_ious.giou;
 
                 tot_diou += all_ious.diou;
                 tot_diou_loss += 1 - all_ious.diou;
@@ -592,10 +606,10 @@ void *process_batch(void* ptr)
 
                         // range is 0 <= 1
                         args->tot_iou += all_ious.iou;
-                        tot_iou_loss += 1 - all_ious.iou;
+                        args->tot_iou_loss += 1 - all_ious.iou;
                         // range is -1 <= giou <= 1
                         tot_giou += all_ious.giou;
-                        tot_giou_loss += 1 - all_ious.giou;
+                        args->tot_giou_loss += 1 - all_ious.giou;
 
                         tot_diou += all_ious.diou;
                         tot_diou_loss += 1 - all_ious.diou;
@@ -656,16 +670,16 @@ void forward_yolo_layer(const layer l, network_state state)
 #ifndef GPU
     for (b = 0; b < l.batch; ++b) {
         for (n = 0; n < l.n; ++n) {
-            int index = entry_index(l, b, n*l.w*l.h, 0);
+            int bbox_index = entry_index(l, b, n*l.w*l.h, 0);
             if (l.new_coords) {
-                activate_array(l.output + index, 4 * l.w*l.h, LOGISTIC);    // x,y,w,h
+                //activate_array(l.output + bbox_index, 4 * l.w*l.h, LOGISTIC);    // x,y,w,h
             }
             else {
-                activate_array(l.output + index, 2 * l.w*l.h, LOGISTIC);        // x,y,
+                activate_array(l.output + bbox_index, 2 * l.w*l.h, LOGISTIC);        // x,y,
+                int obj_index = entry_index(l, b, n*l.w*l.h, 4);
+                activate_array(l.output + obj_index, (1 + l.classes)*l.w*l.h, LOGISTIC);
             }
-            scal_add_cpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1);    // scale x,y
-            index = entry_index(l, b, n*l.w*l.h, 4);
-            activate_array(l.output + index, (1 + l.classes)*l.w*l.h, LOGISTIC);
+            scal_add_cpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + bbox_index, 1);    // scale x,y
         }
     }
 #endif
@@ -708,6 +722,8 @@ void forward_yolo_layer(const layer l, network_state state)
         yolo_args[b].b = b;
 
         yolo_args[b].tot_iou = 0;
+        yolo_args[b].tot_iou_loss = 0;
+        yolo_args[b].tot_giou_loss = 0;
         yolo_args[b].count = 0;
         yolo_args[b].class_count = 0;
 
@@ -719,6 +735,8 @@ void forward_yolo_layer(const layer l, network_state state)
         pthread_join(threads[b], 0);
 
         tot_iou += yolo_args[b].tot_iou;
+        tot_iou_loss += yolo_args[b].tot_iou_loss;
+        tot_giou_loss += yolo_args[b].tot_giou_loss;
         count += yolo_args[b].count;
         class_count += yolo_args[b].class_count;
     }
@@ -728,24 +746,113 @@ void forward_yolo_layer(const layer l, network_state state)
 
     // Search for an equidistant point from the distant boundaries of the local minimum
     int iteration_num = get_current_iteration(state.net);
+    const int start_point = state.net.max_batches * 3 / 4;
     //printf(" equidistant_point ep = %d, it = %d \n", state.net.equidistant_point, iteration_num);
 
-    if (state.net.equidistant_point && state.net.equidistant_point < iteration_num) {
-        float progress_it = iteration_num - state.net.equidistant_point;
-        float progress = progress_it / (state.net.max_batches - state.net.equidistant_point);
-        float loss_threshold = (*state.net.delta_rolling_avg) * progress;
-        printf(" RUN equidistant_point loss_threshold = %f, ep = %d, it = %d \n", loss_threshold, state.net.equidistant_point, iteration_num);
+    if ((state.net.badlabels_rejection_percentage && start_point < iteration_num) ||
+        (state.net.num_sigmas_reject_badlabels && start_point < iteration_num) ||
+        (state.net.equidistant_point && state.net.equidistant_point < iteration_num))
+    {
+        const float progress_it = iteration_num - state.net.equidistant_point;
+        const float progress = progress_it / (state.net.max_batches - state.net.equidistant_point);
+        float ep_loss_threshold = (*state.net.delta_rolling_avg) * progress;
 
         float cur_max = 0;
+        float cur_avg = 0;
+        float counter = 0;
         for (i = 0; i < l.batch * l.outputs; ++i) {
-            if (cur_max < fabs(l.delta[i]))
-                cur_max = fabs(l.delta[i]);
 
-            if (fabs(l.delta[i]) < loss_threshold)
-                l.delta[i] = 0;
+            if (l.delta[i] != 0) {
+                counter++;
+                cur_avg += fabs(l.delta[i]);
+
+                if (cur_max < fabs(l.delta[i]))
+                    cur_max = fabs(l.delta[i]);
+            }
+        }
+
+        cur_avg = cur_avg / counter;
+
+        if (*state.net.delta_rolling_max == 0) *state.net.delta_rolling_max = cur_max;
+        *state.net.delta_rolling_max = *state.net.delta_rolling_max * 0.99 + cur_max * 0.01;
+        *state.net.delta_rolling_avg = *state.net.delta_rolling_avg * 0.99 + cur_avg * 0.01;
+
+        // reject high loss to filter bad labels
+        if (state.net.num_sigmas_reject_badlabels && start_point < iteration_num)
+        {
+            const float rolling_std = (*state.net.delta_rolling_std);
+            const float rolling_max = (*state.net.delta_rolling_max);
+            const float rolling_avg = (*state.net.delta_rolling_avg);
+            const float progress_badlabels = (float)(iteration_num - start_point) / (start_point);
+
+            float cur_std = 0;
+            float counter = 0;
+            for (i = 0; i < l.batch * l.outputs; ++i) {
+                if (l.delta[i] != 0) {
+                    counter++;
+                    cur_std += pow(l.delta[i] - rolling_avg, 2);
+                }
+            }
+            cur_std = sqrt(cur_std / counter);
+
+            *state.net.delta_rolling_std = *state.net.delta_rolling_std * 0.99 + cur_std * 0.01;
+
+            float final_badlebels_threshold = rolling_avg + rolling_std * state.net.num_sigmas_reject_badlabels;
+            float badlabels_threshold = rolling_max - progress_badlabels * fabs(rolling_max - final_badlebels_threshold);
+            badlabels_threshold = max_val_cmp(final_badlebels_threshold, badlabels_threshold);
+            for (i = 0; i < l.batch * l.outputs; ++i) {
+                if (fabs(l.delta[i]) > badlabels_threshold)
+                    l.delta[i] = 0;
+            }
+            printf(" rolling_std = %f, rolling_max = %f, rolling_avg = %f \n", rolling_std, rolling_max, rolling_avg);
+            printf(" badlabels loss_threshold = %f, start_it = %d, progress = %f \n", badlabels_threshold, start_point, progress_badlabels *100);
+
+            ep_loss_threshold = min_val_cmp(final_badlebels_threshold, rolling_avg) * progress;
+        }
+
+
+        // reject some percent of the highest deltas to filter bad labels
+        if (state.net.badlabels_rejection_percentage && start_point < iteration_num) {
+            if (*state.net.badlabels_reject_threshold == 0)
+                *state.net.badlabels_reject_threshold = *state.net.delta_rolling_max;
+
+            printf(" badlabels_reject_threshold = %f \n", *state.net.badlabels_reject_threshold);
+
+            const float num_deltas_per_anchor = (l.classes + 4 + 1);
+            float counter_reject = 0;
+            float counter_all = 0;
+            for (i = 0; i < l.batch * l.outputs; ++i) {
+                if (l.delta[i] != 0) {
+                    counter_all++;
+                    if (fabs(l.delta[i]) > (*state.net.badlabels_reject_threshold)) {
+                        counter_reject++;
+                        l.delta[i] = 0;
+                    }
+                }
+            }
+            float cur_percent = 100 * (counter_reject*num_deltas_per_anchor / counter_all);
+            if (cur_percent > state.net.badlabels_rejection_percentage) {
+                *state.net.badlabels_reject_threshold += 0.01;
+                printf(" increase!!! \n");
+            }
+            else if (*state.net.badlabels_reject_threshold > 0.01) {
+                *state.net.badlabels_reject_threshold -= 0.01;
+                printf(" decrease!!! \n");
+            }
+
+            printf(" badlabels_reject_threshold = %f, cur_percent = %f, badlabels_rejection_percentage = %f, delta_rolling_max = %f \n",
+                *state.net.badlabels_reject_threshold, cur_percent, state.net.badlabels_rejection_percentage, *state.net.delta_rolling_max);
+        }
+
+
+        // reject low loss to find equidistant point
+        if (state.net.equidistant_point && state.net.equidistant_point < iteration_num) {
+            printf(" equidistant_point loss_threshold = %f, start_it = %d, progress = %3.1f %% \n", ep_loss_threshold, state.net.equidistant_point, progress * 100);
+            for (i = 0; i < l.batch * l.outputs; ++i) {
+                if (fabs(l.delta[i]) < ep_loss_threshold)
+                    l.delta[i] = 0;
+            }
         }
-        
-        *state.net.delta_rolling_avg = *state.net.delta_rolling_avg * 0.99 + cur_max * 0.01;
     }
 
     if (count == 0) count = 1;
@@ -790,7 +897,7 @@ void forward_yolo_layer(const layer l, network_state state)
 
         float avg_iou_loss = 0;
         *(l.cost) = loss;
-        /*
+
         // gIOU loss + MSE (objectness) loss
         if (l.iou_loss == MSE) {
             *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
@@ -807,7 +914,7 @@ void forward_yolo_layer(const layer l, network_state state)
             }
             *(l.cost) = avg_iou_loss + classification_loss;
         }
-        */
+
 
         loss /= l.batch;
         classification_loss /= l.batch;
@@ -1068,20 +1175,21 @@ void forward_yolo_layer_gpu(const layer l, network_state state)
     int b, n;
     for (b = 0; b < l.batch; ++b){
         for(n = 0; n < l.n; ++n){
-            int index = entry_index(l, b, n*l.w*l.h, 0);
+            int bbox_index = entry_index(l, b, n*l.w*l.h, 0);
             // y = 1./(1. + exp(-x))
             // x = ln(y/(1-y))  // ln - natural logarithm (base = e)
             // if(y->1) x -> inf
             // if(y->0) x -> -inf
             if (l.new_coords) {
-                activate_array_ongpu(l.output_gpu + index, 4 * l.w*l.h, LOGISTIC);    // x,y,w,h
+                //activate_array_ongpu(l.output_gpu + bbox_index, 4 * l.w*l.h, LOGISTIC);    // x,y,w,h
             }
             else {
-                activate_array_ongpu(l.output_gpu + index, 2 * l.w*l.h, LOGISTIC);    // x,y
+                activate_array_ongpu(l.output_gpu + bbox_index, 2 * l.w*l.h, LOGISTIC);    // x,y
+
+                int obj_index = entry_index(l, b, n*l.w*l.h, 4);
+                activate_array_ongpu(l.output_gpu + obj_index, (1 + l.classes)*l.w*l.h, LOGISTIC); // classes and objectness
             }
-            if (l.scale_x_y != 1) scal_add_ongpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1);      // scale x,y
-            index = entry_index(l, b, n*l.w*l.h, 4);
-            activate_array_ongpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); // classes and objectness
+            if (l.scale_x_y != 1) scal_add_ongpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + bbox_index, 1);      // scale x,y
         }
     }
     if(!state.train || l.onlyforward){
diff --git a/src/yolo_layer.h b/src/yolo_layer.h
index 8de44b45517..08883b0f244 100644
--- a/src/yolo_layer.h
+++ b/src/yolo_layer.h
@@ -20,7 +20,7 @@ void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth
 
 #ifdef GPU
 void forward_yolo_layer_gpu(const layer l, network_state state);
-void backward_yolo_layer_gpu(layer l, network_state state);
+void backward_yolo_layer_gpu(const layer l, network_state state);
 #endif
 
 #ifdef __cplusplus

From 4fddf7c9457c64af65856d7de7bf0a40e4ada8fb Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Tue, 15 Dec 2020 09:29:42 +0800
Subject: [PATCH 14/20] upate

---
 Makefile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 743404c1ce2..a089031f779 100644
--- a/Makefile
+++ b/Makefile
@@ -1,14 +1,14 @@
-GPU=1
-CUDNN=1
+GPU=0
+CUDNN=0
 CUDNN_HALF=0
-OPENCV=1
+OPENCV=0
 AVX=0
 OPENMP=0
-LIBSO=1
+LIBSO=0
 ZED_CAMERA=0
 ZED_CAMERA_v2_8=0
-STREAM=1
-FFMPEG=1
+STREAM=0
+FFMPEG=0
 
 # set GPU=1 and CUDNN=1 to speedup on GPU
 # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher
@@ -17,7 +17,7 @@ FFMPEG=1
 # set ZED_CAMERA_v2_8=1 to enable ZED SDK 2.X
 
 USE_CPP=0
-DEBUG=1
+DEBUG=0
 
 ARCH= -gencode arch=compute_35,code=sm_35 \
       -gencode arch=compute_50,code=[sm_50,compute_50] \

From b141cef077c79e63bbc21587622b67a8f5244b88 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Tue, 15 Dec 2020 09:47:11 +0800
Subject: [PATCH 15/20] minor fix

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index a089031f779..85925cd238b 100644
--- a/Makefile
+++ b/Makefile
@@ -78,7 +78,7 @@ endif
 CPP=g++ -std=c++11
 NVCC=nvcc
 OPTS=-Ofast
-LDFLAGS=-L/usr/local/lib -lm -pthread
+LDFLAGS= -lm -pthread
 COMMON= -Iinclude/ -I3rdparty/stb/include
 CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -fpermissive
 
@@ -97,7 +97,7 @@ COMMON+= `pkg-config --cflags libswresample libswscale libavutil libavcodec liba
 endif
 
 ifeq ($(DEBUG), 1)
-OPTS= -O0 -g
+#OPTS= -O0 -g
 #OPTS= -Og -g
 COMMON+= -DDEBUG
 CFLAGS+= -DDEBUG

From 3298cbe60e132df17e1496a7477295cfa69fbda0 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Tue, 15 Dec 2020 10:26:05 +0800
Subject: [PATCH 16/20] update

---
 .github/workflows/ccpp.yml    |   1 +
 .github/workflows/ccpp.yml.bk | 594 ++++++++++++++++++++++++++++++++++
 2 files changed, 595 insertions(+)
 create mode 100644 .github/workflows/ccpp.yml.bk

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 60482a5a355..3f3cd4b062a 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -82,6 +82,7 @@ jobs:
 
     - name: Restore from cache and run vcpkg
       env:
+        ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true'
         vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_linux.diff
       uses: lukka/run-vcpkg@v2
       with:
diff --git a/.github/workflows/ccpp.yml.bk b/.github/workflows/ccpp.yml.bk
new file mode 100644
index 00000000000..60482a5a355
--- /dev/null
+++ b/.github/workflows/ccpp.yml.bk
@@ -0,0 +1,594 @@
+name: Darknet Continuous Integration
+
+on: [push, pull_request]
+
+jobs:
+  ubuntu-makefile:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install libopencv-dev
+
+    - name: 'Install CUDA'
+      run: |
+        wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
+        sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
+        sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
+        wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
+        sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
+        sudo apt update
+        sudo apt-get install -y --no-install-recommends cuda-compiler-10-2 cuda-libraries-dev-10-2 cuda-driver-dev-10-2 cuda-cudart-dev-10-2 cuda-curand-dev-10-2
+        sudo apt-get install -y --no-install-recommends libcudnn7-dev
+        sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/libcuda.so
+        sudo ln -s /usr/local/cuda-10.2 /usr/local/cuda
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        nvcc --version
+        gcc --version
+
+    - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0'
+      run: |
+        make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 DEBUG=1'
+      run: |
+        make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 DEBUG=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 AVX=1'
+      run: |
+        make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 AVX=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=1'
+      run: |
+        make LIBSO=1 GPU=0 CUDNN=0 OPENCV=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1'
+      run: |
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1'
+      run: |
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 USE_CPP=1'
+      run: |
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 USE_CPP=1 -j 8
+        make clean
+
+
+  ubuntu-vcpkg:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install yasm
+
+    - uses: lukka/get-cmake@latest
+
+    - name: Restore from cache and run vcpkg
+      env:
+        vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_linux.diff
+      uses: lukka/run-vcpkg@v2
+      with:
+        vcpkgArguments: '@${{ env.vcpkgResponseFile }}'
+        vcpkgDirectory: '${{ github.workspace }}/vcpkg'
+        vcpkgGitCommitId: '8121b4ec3d6a11353daf7639ed9082a78e617a2e'
+        appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }}
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeBuildType: 'Release'
+        cmakeAppendedArgs: "-DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release"
+        buildWithCMakeArgs: '--target install'
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  ubuntu:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install libopencv-dev
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--target install'
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  ubuntu-cuda:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install libopencv-dev
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Install CUDA'
+      env:
+        CUDACXX: "/usr/local/cuda-10.2/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda-10.2"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda-10.2"
+        LD_LIBRARY_PATH: "/usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/lib64/stubs:$LD_LIBRARY_PATH"
+      run: |
+        wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
+        sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
+        sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
+        wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
+        sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
+        sudo apt update
+        sudo apt-get install -y --no-install-recommends cuda-compiler-10-2 cuda-libraries-dev-10-2 cuda-driver-dev-10-2 cuda-cudart-dev-10-2 cuda-curand-dev-10-2
+        sudo apt-get install -y --no-install-recommends libcudnn7-dev
+        sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/stubs/libcuda.so.1
+
+    - name: 'Build with CMake and Ninja'
+      env:
+        CUDACXX: "/usr/local/cuda-10.2/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda-10.2"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda-10.2"
+        LD_LIBRARY_PATH: "/usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/lib64/stubs:$LD_LIBRARY_PATH"
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--target install'
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-cuda-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-cuda-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-cuda-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-cuda-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  ubuntu-no-ocv-cpp:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release -DBUILD_AS_CPP:BOOL=TRUE"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--target install'
+
+
+  osx-vcpkg:
+    runs-on: macOS-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Install dependencies
+      run: brew install libomp yasm
+
+    - uses: lukka/get-cmake@latest
+
+    - name: Restore from cache and run vcpkg
+      env:
+        vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_osx.diff
+      uses: lukka/run-vcpkg@v2
+      with:
+        vcpkgArguments: '@${{ env.vcpkgResponseFile }}'
+        vcpkgDirectory: '${{ github.workspace }}/vcpkg'
+        vcpkgGitCommitId: '8121b4ec3d6a11353daf7639ed9082a78e617a2e'
+        appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }}
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeBuildType: 'Release'
+        cmakeAppendedArgs: "-DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release"
+        buildWithCMakeArgs: '--target install'
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  osx:
+    runs-on: macOS-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Install dependencies
+      run: brew install opencv libomp
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--target install'
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  osx-no-ocv-no-omp-cpp:
+    runs-on: macOS-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release -DBUILD_AS_CPP:BOOL=TRUE"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--target install'
+
+
+  win-vcpkg:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: Restore from cache and run vcpkg
+      env:
+        vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_windows.diff
+      uses: lukka/run-vcpkg@v2
+      with:
+        vcpkgArguments: '@${{ env.vcpkgResponseFile }}'
+        vcpkgDirectory: '${{ github.workspace }}/vcpkg'
+        vcpkgGitCommitId: '8121b4ec3d6a11353daf7639ed9082a78e617a2e'
+        appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }}
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--config Release --target install'
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ runner.workspace }}/buildDirectory/Release/*.dll
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  win-vcpkg-cuda:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: 'Install CUDA'
+      run: |
+        choco install cuda --version=10.2.89.20191206 -y
+        $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.."
+        Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
+        refreshenv
+
+    - uses: lukka/get-cmake@latest
+
+    - name: Restore from cache and run vcpkg
+      env:
+        vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_windows_cuda.diff
+        CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDACXX: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe"
+
+      uses: lukka/run-vcpkg@v2
+      with:
+        vcpkgArguments: '@${{ env.vcpkgResponseFile }}'
+        vcpkgDirectory: '${{ github.workspace }}/vcpkg'
+        vcpkgGitCommitId: '8121b4ec3d6a11353daf7639ed9082a78e617a2e'
+        appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }}
+
+    - name: 'Build with CMake and Ninja'
+      env:
+        CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDACXX: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe"
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--config Release --target install'
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: ${{ runner.workspace }}/buildDirectory/Release/*.dll
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  win-integrated-libs:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--config Release --target install'
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/3rdparty/pthreads/bin/*.dll
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  win-intlibs-cpp:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release -DBUILD_AS_CPP:BOOL=TRUE"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--config Release --target install'
+
+
+  win-intlibs-cuda:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: 'Install CUDA'
+      run: |
+        choco install cuda --version=10.2.89.20191206 -y
+        $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.."
+        Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
+        refreshenv
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake and Ninja'
+      env:
+        CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+        CUDACXX: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe"
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-DCMAKE_BUILD_TYPE=Release"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--config Release --target install'
+
+
+  mingw:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake and Ninja'
+      uses: lukka/run-cmake@v2
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-G\"MinGW Makefiles\" -DCMAKE_BUILD_TYPE=Release"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--config Release --target install'
+
+
+  cygwin:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: 'Install Cygwin'
+      run: |
+        choco install cygwin -y
+        choco install cyg-get -y
+        cyg-get gcc-g++ cmake make libopencv-devel libncurses-devel
+
+    - name: 'Build'
+      run: |
+        mkdir buildDirectory
+        cd buildDirectory
+        path C:\tools\cygwin\bin
+        bash -c 'cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE="Release"'
+        bash -c 'cmake --build . --target install -- -j8'
+      shell: cmd

From 2067e3873f4277097763d63e658a134680a0570c Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Tue, 15 Dec 2020 12:25:12 +0800
Subject: [PATCH 17/20] update

---
 .github/workflows/ccpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 3f3cd4b062a..6f40067f4c2 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -82,7 +82,7 @@ jobs:
 
     - name: Restore from cache and run vcpkg
       env:
-        ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true'
+        ACTIONS_ALLOW_UNSECURE_COMMANDS: true
         vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_linux.diff
       uses: lukka/run-vcpkg@v2
       with:

From 25ffaf4ce49ae0fb657a089a0ccfa128b6ae68c7 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Tue, 15 Dec 2020 12:31:13 +0800
Subject: [PATCH 18/20] fix set-env issue

---
 .github/workflows/ccpp.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 6f40067f4c2..cf383f6b169 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -255,6 +255,7 @@ jobs:
 
     - name: Restore from cache and run vcpkg
       env:
+        ACTIONS_ALLOW_UNSECURE_COMMANDS: true
         vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_osx.diff
       uses: lukka/run-vcpkg@v2
       with:
@@ -359,6 +360,7 @@ jobs:
 
     - name: Restore from cache and run vcpkg
       env:
+        ACTIONS_ALLOW_UNSECURE_COMMANDS: true
         vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_windows.diff
       uses: lukka/run-vcpkg@v2
       with:
@@ -415,6 +417,7 @@ jobs:
 
     - name: Restore from cache and run vcpkg
       env:
+        ACTIONS_ALLOW_UNSECURE_COMMANDS: true
         vcpkgResponseFile: ${{ github.workspace }}/cmake/vcpkg_windows_cuda.diff
         CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
         CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"

From 22a44da9f8d01b39f54e6e36dc4d8ff7d09cff6f Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Tue, 15 Dec 2020 12:49:19 +0800
Subject: [PATCH 19/20] fix set-env issue

---
 .github/workflows/ccpp.yml | 2 ++
 Makefile                   | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index cf383f6b169..085507fd319 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -370,6 +370,8 @@ jobs:
         appendedCacheKey: ${{ hashFiles(env.vcpkgResponseFile) }}
 
     - name: 'Build with CMake and Ninja'
+      env:
+        ACTIONS_ALLOW_UNSECURE_COMMANDS: true
       uses: lukka/run-cmake@v2
       with:
         cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
diff --git a/Makefile b/Makefile
index 85925cd238b..f42109ba1ce 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
-GPU=0
-CUDNN=0
+GPU=1
+CUDNN=1
 CUDNN_HALF=0
-OPENCV=0
+OPENCV=1
 AVX=0
 OPENMP=0
 LIBSO=0

From b1538bfba1e37f81709f09695ebec0fb56941a52 Mon Sep 17 00:00:00 2001
From: edwardxliu <edward.ed.liu@gmail.com>
Date: Tue, 15 Dec 2020 13:55:02 +0800
Subject: [PATCH 20/20] fix set-env issue

---
 .github/workflows/ccpp.yml | 1 +
 Makefile                   | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 085507fd319..ba696defcee 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -435,6 +435,7 @@ jobs:
 
     - name: 'Build with CMake and Ninja'
       env:
+        ACTIONS_ALLOW_UNSECURE_COMMANDS: true
         CUDA_PATH: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
         CUDA_PATH_V10_2: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
         CUDA_TOOLKIT_ROOT_DIR: "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
diff --git a/Makefile b/Makefile
index f42109ba1ce..85925cd238b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
-GPU=1
-CUDNN=1
+GPU=0
+CUDNN=0
 CUDNN_HALF=0
-OPENCV=1
+OPENCV=0
 AVX=0
 OPENMP=0
 LIBSO=0