YOLO9000 detection

I am a complete novice, if there is anything wrong, please criticize and correct me.

The configuration required by yolo to test only pictures is not very high. It can run without cuda and opencv. In cpu mode, it takes 6~7 seconds to test a picture.

The following is the process of running the yolo code:

First clone the code from the official website, and download the pre-trained models (one normal version and one express version), provided you do not want to train your own models.

clone:git clone https://github.com/pjreddie/darknet

下载两个预训练模型,下载完放入darknet文件夹下面即可

http://pjreddie.com/media/files/yolo.weights
http://pjreddie.com/media/files/tiny-yolo-voc.weights
Test image:
cd darknet
make
./darknet detect cfg/yolo.cfg yolo.weights data/dog.jpg
The meaning of this statement is to enter the darknet.c file, which is in the src folder, and then enter the main function of darknet.c. The main function is
mainly to judge the input parameters. When judging, use the space left key as a separator. The following Give the main code of the main function:
int main(int argc, char **argv) #argc represents the number of input parameters, argv is the content of the input parameters
{
    //test_resize("data/bad.jpg");
    // test_box();
    //test_convolutional_layer();
    if(argc < 2){
        fprintf(stderr, "usage: %s <function>\n", argv[0]);
        return 0;
    }
    gpu_index = find_int_arg(argc, argv , "-i", 0);
    if(find_arg(argc, argv, "-nogpu")) {
        gpu_index = -1;
    }

#ifndef GPU
    gpu_index = -1;
#else
    if(gpu_index >= 0){
        cuda_set_device( gpu_index);
    }
#endif

 if (0 == strcmp(argv[1], "average")){
        average(argc, argv);
    } else if (0 == strcmp(argv[1], "yolo")){
        run_yolo(argc, argv);
    } else if (0 == strcmp(argv[1], "voxel")){
        run_voxel(argc, argv);
    } else if (0 == strcmp(argv[1], "super")){
        run_super(argc, argv);
    } else if (0 == strcmp(argv[1], "detector")){
        run_detector(argc, argv);
    } else if (0 == strcmp(argv[1], "detect")){
        float thresh = find_float_arg(argc, argv, "-thresh", .24);
        char *filename = (argc > 4) ? argv[4]: 0;
        test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5);
...}
这个函数的作用就是判断我们输入的命令中的一些参数,像我们测试图片就会检测到“detect”这个关键字,然后读取阈值(没有的话默认是
0.24),读取图片的地址(没有的话会提示让你输入图片的路径),然后就进入test_detector函数,下面贴出这个函数以及我做的一些备注:
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh)
{
    list *options = read_data_cfg(datacfg);#读取数据文件
    char *name_list = option_find_str(options, "names", "data/names.list");  
     #读取namelist(coco.name)
    char **names = get_labels(name_list);
     #读取标签
    image **alphabet = load_alphabet();
    #读取labels下面的图片
    network net = parse_network_cfg(cfgfile);
    #读取网络架构
    if(weightfile){
        load_weights(&net, weightfile);
    }
    set_batch_network(&net, 1);
    #将网络的batch设置为1
    srand(2222222);
    clock_t time;#开始计时
    char buff[256];
    char *input = buff;
    int j;
    float nms=.4; #nms阈值
    while(1){
        if(filename){
            strncpy(input, filename, 256); #复制图片路径
        } else {
            printf("Enter Image Path: ");
            fflush(stdout);
            input = fgets(input, 256, stdin);
            if(!input) return;
            strtok(input, "\n");
        }
        image im = load_image_color(input,0,0);#
        image sized = resize_image(im, net.w, net.h);#将图片的resize到416*416
        layer l = net.layers[net.n-1];#网络最后一层

        box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); #分配box的空间
        float **probs = calloc(l.w*l.h*l.n, sizeof(float *));#分配分数的空间
        for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));

        float *X = sized.data;  #resize之后的图片
        time=clock();
        network_predict(net, X);%开始检测图片,返回最后一层的输出
        printf("%s: Predicted finised in %f seconds.\n", input, sec(clock()-time));
        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);#得到预测的所有框
        if (l.softmax_tree && nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
#极大值抑制
        draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
#输出各个框的置信度得分以及画出这些框
        save_image(im, "predictions");
        show_image(im, "predictions");

        free_image(im);
        free_image(sized);
        free(boxes);
        free_ptrs((void **)probs, l.w*l.h*l.n);
#ifdef OPENCV
        cvWaitKey(0);
        cvDestroyAllWindows();
#endif
        if (filename) break;
    }
}
上面中的每一个函数都在其他的.c文件中能够找到,不懂的可以去找一找。
下面给出如何在电脑上基于yolo算法使用摄像头进行检测或者是检测视频:(前提是要装好cuda和opencv)
./darknet detector demo cfg/coco.data cfg/yolo.cfg yolo.weights
这条命令是打开摄像头进行实时检测,能检测的类别数在data文件夹下面的coco.name中,这个文件大家可以自行更改。
./darknet detector demo cfg/coco.data cfg/yolo.cfg yolo.weights <video file>
这条命令是检测视频,视频要放在darknet的根目录下面。
使用摄像头或者是检测 视频都会进入run_detector()这个函数,下面贴出这个函数的代码,如果上面的检测过程你很熟悉了,
那下面的代码看起来也不是很难了
void run_detector(int argc, char **argv)
{
    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
    float thresh = find_float_arg(argc, argv, "-thresh", .24);
    float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
    int cam_index = find_int_arg(argc, argv, "-c", 0);
    int frame_skip = find_int_arg(argc, argv, "-s", 0);
    if(argc < 4){
        fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
        return;
    }
    char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
    char *outfile = find_char_arg(argc, argv, "-out", 0);
    int *gpus = 0;
    int gpu = 0;
    int ngpus = 0;
    if(gpu_list){
        printf("%s\n", gpu_list);
        int len = strlen(gpu_list);
        ngpus = 1;
        int i;
        for(i = 0; i < len; ++i){
            if (gpu_list[i] == ',') ++ngpus;
        }
        gpus = calloc(ngpus, sizeof(int));
        for(i = 0; i < ngpus; ++i){
            gpus[i] = atoi(gpu_list);
            gpu_list = strchr(gpu_list, ',')+1;
        }
    } else {
        gpu = gpu_index;
        gpus = &gpu;
        ngpus = 1;
    }

    int clear = find_arg(argc, argv, "-clear");

    char *datacfg = argv[3];
    char *cfg = argv[4];
    char *weights = (argc > 5) ? argv[5] : 0;
    char *filename = (argc > 6) ? argv[6]: 0;
    if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh);
    else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
    else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
    else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
    else if(0==strcmp(argv[2], "demo")) {
        list *options = read_data_cfg(datacfg);
        int classes = option_find_int(options, "classes", 20);
        char *name_list = option_find_str(options, "names", "data/names.list");
        char **names = get_labels(name_list);
        demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh);
    }
}
。。。。
未完待续

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325406238&siteId=291194637