C++는 YOLO 대상 인식 이미지 전처리 및 후처리를 구현합니다.

【배경】

왜 YOLO 표적 인식의 이미지 후처리 부분을 직접 작성해야 합니까? 자체 개발한 FPGA 개발 보드를 사용하고 그 위에 YOLO 타겟 인식 가속 컴퓨팅 회로를 개발 및 배포했기 때문에 PS 측 개발자로서 입력 이미지를 지정된 DDR 주소로 보내고 FPGA 가속 컴퓨팅 레지스터를 활성화해야 합니다. .계산 결과가 인터럽트를 발생시키기를 기다리고, 대상 인식 결과의 원본 데이터를 검색하고, 후처리 작업을 통해 이를 복원하고, 원본 이미지에 결과를 표시합니다.

【드라이 정보】

더 이상 고민하지 말고 바로 코드를 살펴보겠습니다!

다음 코드를 통해 최소한 다음 작업을 배워야 합니다.

1. 폴더에서 데이터 세트 이미지를 읽는 방법 알아보기

2. FPGA 결과에 대한 역양자화, 디코딩, 억제 최대화 및 기타 기본 작업 방법을 알아보세요.

3. 인식 결과에 라벨을 붙이고 저장하는 방법을 알아보세요.

4. 레지스터 작동을 통해 FPGA 작동을 제어하는 ​​방법을 배웁니다.

static char text[256];
static int length = 5+5;
static int kind_nums = 5;
static float anchors_0[6] = {2.53125f, 2.5625f, 4.21875f, 5.28125f, 10.75f, 9.96875f};
static float anchors_1[6] = {1.4375f, 1.6875f, 2.3125f, 3.625f, 5.0625f, 5.125f};
static float conf_thresh_ = 0.5f;
static float nms_thresh_ = 0.4f;

static std::string label_name[] = {"person", "aeroplane", "bus", "car", "truck"};

bool PedestrianDetection::get_files_list()
{
  struct dirent *ptr;
  DIR *dir;
  std::string PATH = dataset_dir_;
  dir = opendir(PATH.c_str());
  while((ptr=readdir(dir)) != nullptr)
  {
    if(ptr->d_name[0] == '.')
      continue;
    files_list.push_back(ptr->d_name);
  }
  for(auto iter : files_list)
  {
    printf("%s\n", iter.c_str());
  }
  closedir(dir);
  if(files_list.size() == dataset_size)
  {
    return true;
  }
  else {
    printf("[PedestrianDetection] : Error to search dataset %ld \n", files_list.size());
    return false;
  }
}

bool PedestrianDetection::init_net_info(std::string pkg_path)
{
  yolov4tn = 2;
  outputchannel = 30;  // class number change
  yolov4tnsize = {11, 15, 22, 30};  // class number change
  yolov4tnsize_sum = yolov4tnsize[0] * yolov4tnsize[1] + yolov4tnsize[2] * yolov4tnsize[3];
  accData_size = yolov4tnsize_sum * outputchannel;
  accRaw_size = yolov4tnsize_sum * (outputchannel + 2);

  return true;
}

void PedestrianDetection::init_fpga()
{
  raw_img_w = 480;
  raw_img_h = 352;
  raw_img_c = 3;
  memset(&yolo_v4, 0, sizeof(struct Model_Output));
  yolo_v4.addr[0] = 0x4491F800;
  yolo_v4.length[0] = 11 * 15 * (30+2);
  yolo_v4.addr[1] = 0x44921000;
  yolo_v4.length[1] = 22 * 30 * (30+2);
  if(init_model_output(&yolo_v4, fd_mem) == -1)
  {
    printf("[yolov4] failed to init model output \n");
    exit(-1);
  }
}

void PedestrianDetection::read_dataset()
{
  while(nh_private.ok())
  {
    while(ready_buf_.size() > 50)
    {
      std::this_thread::sleep_for(std::chrono::milliseconds(5));
    }

#ifdef WITH_NETCAM
    cv::Mat origin;
    struct timeval timestamp;
    if(netcam.getCameraImage(origin, timestamp, cv::Size(1920, 1080)))
    {
      std::vector<cv::Mat> pics;
      cut_img(origin, 2, 2, pics);
      for(int i = 0; i < pics.size(); i++)
      {
        if(i != (pics.size()-1))
        {
          std::vector<uint8_t> vec_data;
          cv::imencode(".jpg", pics[i], vec_data);
          std::string str_data;
          str_data.assign(vec_data.begin(), vec_data.end());
          zmqpp::message message;
          message << "robot" + std::to_string(i+1) +"pic" << str_data;
          socket_pubraw.send(message);
        }
        else {
          cv::Mat resize;
          cv::resize(pics[i], resize, cv::Size(raw_img_w, raw_img_h), cv::INTER_CUBIC); // cv::INTER_CUBIC
          cv::cvtColor(resize, resize, cv::COLOR_BGR2RGB);

          cv::Mat padding;
          cv::copyMakeBorder(resize, padding, 1, 0, 1, 0, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
          rawImgFile tmp;
          tmp.raw_img = pics[i];
          tmp.pad_img = padding;
          tmp.file_name = robot_id;
          ready_buf_mutex_.lock();
          ready_buf_.push(tmp);
          ready_buf_mutex_.unlock();
        }
      }
    }
    else {
      printf("camera no data\n");
      usleep(30*1000);
      continue;
    }
#else
    zmqpp::message message;
    socket_sub.receive(message);
    std::string topic;
    std::string data_str;
    message >> topic >> data_str;
    std::vector<uint8_t> data_vec;
    data_vec.assign(data_str.begin(), data_str.end());

    cv::Mat img_decode;
    img_decode = cv::imdecode(data_vec, CV_LOAD_IMAGE_COLOR);
    cv::Mat resize;
    cv::resize(img_decode, resize, cv::Size(raw_img_w, raw_img_h), cv::INTER_CUBIC); // cv::INTER_CUBIC
    cv::cvtColor(resize, resize, cv::COLOR_BGR2RGB);

    cv::Mat padding;
    cv::copyMakeBorder(resize, padding, 1, 0, 1, 0, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
    rawImgFile tmp;
    tmp.raw_img = img_decode;
    tmp.pad_img = padding;
    tmp.file_name = robot_id;
    ready_buf_mutex_.lock();
    ready_buf_.push(tmp);
    ready_buf_mutex_.unlock();
#endif

  }
}

void PedestrianDetection::test_fpga()
{
  while(nh_private.ok())
  {
    while(nh_private.ok())
    {
      if(ready_buf_.empty())
      {
        std::this_thread::sleep_for(std::chrono::milliseconds(5));
      }
      else
      {
        break;
      }
    }

    rawImgFile tmp_input;
    ready_buf_mutex_.lock();
    tmp_input = ready_buf_.front();
    ready_buf_.pop();
    ready_buf_mutex_.unlock();

    tmp_input.acc.acc_out = nullptr;
    tmp_input.acc.acc_out = new int8_t[accRaw_size];
    if(!tmp_input.acc.acc_out)
    {
      ROS_ERROR("Allocation of memory Failed \n");
      return;
    }

    int image_size = 481 * 353 * 3;
    image_to_mem(tmp_input.pad_img.data, 0x46000000, fd_mem, image_size);
 	
    // TODO : start fpga
    void* mem_ctrl = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd_mem, 0x0400000000L);

    int state = 0;
    while(state != 1)
    {
      memcpy(&state, (char*)mem_ctrl+124, 4);
    }
    state = 1;
    memcpy((char*)mem_ctrl, &state, 4);
    int res = -1;
    memcpy(&res, (char*)mem_ctrl, 4);
    
    state = 0;
    memcpy((char*)mem_ctrl, &state, 4);
    memcpy(&res, (char*)mem_ctrl, 4);

    int busy = 0;
    memcpy(&busy, (char*)mem_ctrl+8, 4);
    int busy_prev = busy;
    while(busy_prev != 0)
    {
      memcpy(&busy, (char*)mem_ctrl+8, 4);
      if(busy != busy_prev)
      {
        busy_prev = busy;
      }
    }
    munmap(mem_ctrl, 4096);

    get_model_output(&yolo_v4, tmp_input.acc.acc_out);

    result_buf_mutex_.lock();
    result_buf_.push(tmp_input);
    result_buf_mutex_.unlock();
  }
}


void PedestrianDetection::write_predict()
{
  while(nh_private.ok())
  {
    while(nh_private.ok())
    {
      if(result_buf_.empty())
      {
        std::this_thread::sleep_for(std::chrono::milliseconds(5));
      }
      else
      {
        break;
      }
    }

    rawImgFile tmp_output;
    result_buf_mutex_.lock();
    tmp_output = result_buf_.front();
    result_buf_.pop();
    result_buf_mutex_.unlock();

    decode(tmp_output);

    delete [] tmp_output.acc.acc_out;
    tmp_output.acc.acc_out = nullptr;
  }
}

static bool outbin_write = true;
void PedestrianDetection::decode(rawImgFile &msg)
{
  cv::Mat frame = msg.raw_img;

  if(msg.acc.acc_out != nullptr)
  {
    int8_t* data;
    data = msg.acc.acc_out;

    if(outbin_write)
    {
      FILE* fp = fopen((pkg_path + "/unit_test/result_img/out.bin").c_str(), "wb");
      fwrite(data, sizeof(int8_t), accRaw_size, fp);
      outbin_write = false;
    }

    float *out = (float*)calloc(accData_size, sizeof(float));
    unpadding(data, out, 32, 2);
    std::vector<de_result> res;
    process_decode(out, res, frame);
    draw_objects(frame, res);
    
#if 1

    draw_objects(msg.raw_img, res);
    std::vector<uint8_t> vec_data;
    cv::imencode(".jpg", msg.raw_img, vec_data);
    std::string str_data;
    str_data.assign(vec_data.begin(), vec_data.end());
    zmqpp::message message;
    message << "pedestrian_detect" << str_data;
    socket_pub.send(message);
//    cv::imwrite((pkg_path + "/unit_test/result_img/" + img_name +"jpg"), msg.raw_img);

/*
    std::ofstream outfile;
    outfile.open((pkg_path + "/unit_test/result_label/" + img_name +"txt"));
    for(size_t i = 0; i < res.size(); i++)
    {
      std::string str_conf = std::to_string(res[i].complex_prob);
      std::string conf_4(str_conf.begin(), str_conf.begin()+5);
      outfile << label_name[res[i].index] << " " << conf_4 << " " << static_cast<int>(res[i].x - res[i].w/2) << " " << static_cast<int>(res[i].y - res[i].h/2) << " " << static_cast<int>(res[i].x + res[i].w/2) << " " << static_cast<int>(res[i].y + res[i].h/2);
      if(i != (res.size()-1))
        outfile << std::endl;
    }
    outfile.close();*/
/*
    FILE* fp = fopen((pkg_path + "/unit_test/result_bin/" + img_name +"bin").c_str(), "wb");
    fwrite(data, sizeof(int8_t), 11 * 15 * 96 + 22 * 30 * 96, fp);
    fclose(fp);
    */
#endif

    free(out);
  }
  else
  {
    printf("acc is empty \n");
  }

}


void PedestrianDetection::unpadding(int8_t* input, float* output, int ic ,int padding)
{
  int offset = yolov4tnsize[0] * yolov4tnsize[1];
  for(int i = 0; i < yolov4tnsize_sum; i++)
  {
    if(i < offset)
    {
      for(int j = 0; j < ic - padding; j++)
      {
        int tmp = *(input + j);
        *(output++) = (tmp - 78) * 0.1175554f;
      }
    }
    else
    {
      for(int j = 0; j < ic - padding; j++)
      {
        int tmp = *(input + j);
        *(output++) = (tmp - 88) * 0.1006139f;
      }
    }

    input += ic;
  }
}

inline float PedestrianDetection::sigmoid(const float &x)
{
  return (1 / (1 + expf(-x)));
}

inline int PedestrianDetection::find_max_index(float *buf, int len)
{
  int k = 0;
  float max = *buf;
  for (int i = 1; i < len; ++i)
  {
    if (buf[i] > max)
    {
      k = i;
      max = buf[i];
    }
  }
  return k;
}

bool comp_prob(const de_result &a, const de_result &b)
{
  return a.complex_prob > b.complex_prob;
}

bool comp(const de_result &a, const de_result &b)
{
  return a.index > b.index;
}

inline float PedestrianDetection::clac_iou(const de_result &A, const de_result &B)
{
  float aminx = A.x - A.w / 2.f;
  float amaxx = A.x + A.w / 2.f;
  float aminy = A.y - A.h / 2.f;
  float amaxy = A.y + A.h / 2.f;

  float bminx = B.x - B.w / 2.f;
  float bmaxx = B.x + B.w / 2.f;
  float bminy = B.y - B.h / 2.f;
  float bmaxy = B.y + B.h / 2.f;

  float w = std::min(amaxx, bmaxx) - std::max(aminx, bminx);
  float h = std::min(amaxy, bmaxy) - std::max(aminy, bminy);

  if (w <= 0 || h <= 0)
    return 0;

  return (w * h) / ((A.w * A.h) + (B.w * B.h) - (w * h));
}

void PedestrianDetection::nms(const std::vector<de_result> &vpbox, std::vector<de_result> &voutbox, float iou_thres)
{
  std::vector<float> flag(vpbox.size(), -1.0);
  int n = vpbox.size();
  for (int i = 0; i < n; i++)
  {
    flag[i] = vpbox[i].index;
  }

  for (int i = 0; i < n; i++)
  {
    if (flag[i] == -1.0)
      continue;
    for (int j = i + 1; j < n; j++)
    {
      if(flag[j] == -1.0 || vpbox[i].index != vpbox[j].index)
        continue;
      float iou = clac_iou(vpbox[i], vpbox[j]);

      if (iou > iou_thres)
      {
        flag[j] = -1.0;
      }
    }
  }
  for (int i = 0; i < n; i++)
  {
    if (flag[i] != -1.0)
    {
      voutbox.push_back(vpbox[i]);
    }
  }
}

void PedestrianDetection::decode_one_with_thre(int layer, float *anchors, void *data, const int xno, const int yno, const int anchor_no, std::vector<de_result>& boxes, cv::Mat &img)
{
  const float *ori_ptr = (const float *)data;
  float decode_data[length], *data_ptr;
  data_ptr = decode_data;

  data_ptr[4] = sigmoid(ori_ptr[4]);
  for(int k = 5; k < length; k++)
  {
    data_ptr[k] = sigmoid(ori_ptr[k]);
  }
  int max_index = find_max_index(data_ptr+5, kind_nums);
  if(data_ptr[max_index+5] * data_ptr[4] < conf_thresh_)
    return;

  data_ptr[0] = sigmoid(ori_ptr[0]);
  data_ptr[1] = sigmoid(ori_ptr[1]);
  data_ptr[2] = expf(ori_ptr[2]);
  data_ptr[3] = expf(ori_ptr[3]);

  de_result tmp;
  tmp.x = (data_ptr[0] + xno) / yolov4tnsize[2 * layer + 1] * img.cols;
  tmp.y = (data_ptr[1] + yno) / yolov4tnsize[2 * layer] * img.rows;
  tmp.w = (((data_ptr[2]) * anchors[2 * anchor_no])) / yolov4tnsize[2 * layer + 1] * img.cols;
  tmp.h = (((data_ptr[3]) * anchors[2 * anchor_no + 1])) / yolov4tnsize[2 * layer] * img.rows;
  tmp.conf = data_ptr[4];
  tmp.index = max_index;
  tmp.prob = data_ptr[max_index+5];
  tmp.complex_prob = data_ptr[max_index+5] * data_ptr[4];
//  printf("## layer[%d] prob : %f , x : %f , y : %f , w : %f , h : %f \n", layer, tmp.prob, tmp.x, tmp.y, tmp.w, tmp.h);
  boxes.push_back(tmp);
}

void PedestrianDetection::process_decode(float* input, std::vector<de_result> &res, cv::Mat &img)
{
  std::vector<de_result> boxes;
  float* data_ptr = input;

  for(int i = 0; i < yolov4tnsize[0]; i++){
    for(int j = 0; j < yolov4tnsize[1]; j++){
      for(int k = 0; k < 3; k++)
      {
        decode_one_with_thre(0, anchors_0, data_ptr, j, i, k, boxes, img);
        data_ptr += length;
      }
    }
  }

  for(int i = 0; i < yolov4tnsize[2]; i++){
    for(int j = 0; j < yolov4tnsize[3]; j++){
      for(int k = 0; k < 3; k++)
      {
        decode_one_with_thre(1, anchors_1, data_ptr, j, i, k, boxes, img);
        data_ptr += length;
      }
    }
  }
  sort(boxes.begin(), boxes.end(), comp_prob);
  nms(boxes, res, nms_thresh_);
  boxes.clear();
}


void PedestrianDetection::draw_objects(cv::Mat &image, const std::vector<de_result> &objects)
{
  double font_size = 0.6;
  int font_bold = 1;
  int baseLine = 0;
  for(size_t i = 0; i < objects.size(); i++)
  {
    const de_result &obj = objects[i];

    sprintf(text, "%s %.1f%%", label_name[obj.index].c_str(), obj.complex_prob * 100);
    int x = std::max(static_cast<int>(obj.x - obj.w / 2), 0);
    int y = std::max(static_cast<int>(obj.y - obj.h / 2), 0);
    int x1 = std::min(static_cast<int>(obj.x + obj.w / 2), image.cols);
    int y1 = std::min(static_cast<int>(obj.y + obj.h / 2), image.rows);
    int w = x1 - x - 1;
    int h = y1 - y - 1;

    cv::Rect obj_rect(x, y, w, h);
    cv::rectangle(image, obj_rect, cv::Scalar(0, 255, 0), font_bold);
    cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, font_size, font_bold, &baseLine);

    int tx = obj_rect.x;
    int ty = obj_rect.y - label_size.height - baseLine;
    if (ty < 0)
      ty = 0;

    cv::Point point;
    point = cv::Point(tx, ty + label_size.height);
    cv::rectangle(image, cv::Rect(cv::Point(obj_rect.x, ty), cv::Size(obj_rect.width, label_size.height + baseLine)), cv::Scalar(128, 128, 0), CV_FILLED);
    cv::putText(image, text, point, cv::FONT_HERSHEY_SIMPLEX, font_size, cv::Scalar(0, 255, 0), font_bold);
  }
}

void PedestrianDetection::cut_img(cv::Mat &src, int m, int n, std::vector<cv::Mat> &ceil)
{
  int height = src.rows;
  int width = src.cols;
  int ceil_h = height / m;
  int ceil_w = width / n;
  cv::Mat roi_img;
  for(int i = 0; i < m; i++)
  {
    for(int j = 0; j < n; j++)
    {
      cv::Rect rect(j*ceil_w, i*ceil_h, ceil_w, ceil_h);
      roi_img = cv::Mat(src, rect);
      ceil.push_back(roi_img);
    }
  }
}

추천

출처blog.csdn.net/weixin_39538031/article/details/131001772