yolov8-seg.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. //
  2. // Created by ubuntu on 3/16/23.
  3. //
  4. #include "yolov8-seg.h"
  5. using namespace seg;
  6. YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path)
  7. {
  8. std::ifstream file(engine_file_path, std::ios::binary);
  9. assert(file.good());
  10. file.seekg(0, std::ios::end);
  11. auto size = file.tellg();
  12. file.seekg(0, std::ios::beg);
  13. char* trtModelStream = new char[size];
  14. assert(trtModelStream);
  15. file.read(trtModelStream, size);
  16. file.close();
  17. initLibNvInferPlugins(&this->gLogger, "");
  18. this->runtime = nvinfer1::createInferRuntime(this->gLogger);
  19. assert(this->runtime != nullptr);
  20. this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
  21. assert(this->engine != nullptr);
  22. delete[] trtModelStream;
  23. this->context = this->engine->createExecutionContext();
  24. assert(this->context != nullptr);
  25. cudaStreamCreate(&this->stream);
  26. this->num_bindings = this->engine->getNbBindings();
  27. for (int i = 0; i < this->num_bindings; ++i) {
  28. Binding binding;
  29. nvinfer1::Dims dims;
  30. nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
  31. std::string name = this->engine->getBindingName(i);
  32. binding.name = name;
  33. binding.dsize = type_to_size(dtype);
  34. bool IsInput = engine->bindingIsInput(i);
  35. if (IsInput) {
  36. this->num_inputs += 1;
  37. dims = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
  38. binding.size = get_size_by_dims(dims);
  39. binding.dims = dims;
  40. this->input_bindings.push_back(binding);
  41. // set max opt shape
  42. this->context->setBindingDimensions(i, dims);
  43. }
  44. else {
  45. dims = this->context->getBindingDimensions(i);
  46. binding.size = get_size_by_dims(dims);
  47. binding.dims = dims;
  48. this->output_bindings.push_back(binding);
  49. this->num_outputs += 1;
  50. }
  51. // printf("name: %s, size: %ld, dims: %d %d %d %d %d\n",
  52. // name.c_str(), binding.dsize, dims.nbDims, dims.d[0], dims.d[1], dims.d[2], dims.d[3]);
  53. }
  54. }
  55. YOLOv8_seg::~YOLOv8_seg()
  56. {
  57. this->context->destroy();
  58. this->engine->destroy();
  59. this->runtime->destroy();
  60. cudaStreamDestroy(this->stream);
  61. for (auto& ptr : this->device_ptrs) {
  62. CHECK(cudaFree(ptr));
  63. }
  64. for (auto& ptr : this->host_ptrs) {
  65. CHECK(cudaFreeHost(ptr));
  66. }
  67. }
  68. void YOLOv8_seg::make_pipe(bool warmup)
  69. {
  70. for (auto& bindings : this->input_bindings) {
  71. void* d_ptr;
  72. CHECK(cudaMalloc(&d_ptr, bindings.size * bindings.dsize));
  73. this->device_ptrs.push_back(d_ptr);
  74. }
  75. for (auto& bindings : this->output_bindings) {
  76. void * d_ptr, *h_ptr;
  77. size_t size = bindings.size * bindings.dsize;
  78. CHECK(cudaMalloc(&d_ptr, size));
  79. CHECK(cudaHostAlloc(&h_ptr, size, 0));
  80. this->device_ptrs.push_back(d_ptr);
  81. this->host_ptrs.push_back(h_ptr);
  82. }
  83. if (warmup) {
  84. for (int i = 0; i < 10; i++) {
  85. for (auto& bindings : this->input_bindings) {
  86. size_t size = bindings.size * bindings.dsize;
  87. void* h_ptr = malloc(size);
  88. memset(h_ptr, 0, size);
  89. CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
  90. free(h_ptr);
  91. }
  92. this->infer();
  93. }
  94. printf("model warmup 10 times\n");
  95. }
  96. }
  97. void YOLOv8_seg::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
  98. {
  99. const float inp_h = size.height;
  100. const float inp_w = size.width;
  101. float height = image.rows;
  102. float width = image.cols;
  103. float r = std::min(inp_h / height, inp_w / width);
  104. int padw = std::round(width * r);
  105. int padh = std::round(height * r);
  106. cv::Mat tmp;
  107. if ((int)width != padw || (int)height != padh) {
  108. cv::resize(image, tmp, cv::Size(padw, padh));
  109. }
  110. else {
  111. tmp = image.clone();
  112. }
  113. float dw = inp_w - padw;
  114. float dh = inp_h - padh;
  115. dw /= 2.0f;
  116. dh /= 2.0f;
  117. int top = int(std::round(dh - 0.1f));
  118. int bottom = int(std::round(dh + 0.1f));
  119. int left = int(std::round(dw - 0.1f));
  120. int right = int(std::round(dw + 0.1f));
  121. cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
  122. cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
  123. this->pparam.ratio = 1 / r;
  124. this->pparam.dw = dw;
  125. this->pparam.dh = dh;
  126. this->pparam.height = height;
  127. this->pparam.width = width;
  128. }
  129. void YOLOv8_seg::copy_from_Mat(const cv::Mat& image)
  130. {
  131. cv::Mat nchw;
  132. auto& in_binding = this->input_bindings[0];
  133. auto width = in_binding.dims.d[3];
  134. auto height = in_binding.dims.d[2];
  135. cv::Size size{width, height};
  136. this->letterbox(image, nchw, size);
  137. this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
  138. CHECK(cudaMemcpyAsync(
  139. this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
  140. }
  141. void YOLOv8_seg::copy_from_Mat(const cv::Mat& image, cv::Size& size)
  142. {
  143. cv::Mat nchw;
  144. this->letterbox(image, nchw, size);
  145. this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
  146. CHECK(cudaMemcpyAsync(
  147. this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
  148. }
  149. void YOLOv8_seg::infer()
  150. {
  151. this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
  152. for (int i = 0; i < this->num_outputs; i++) {
  153. size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
  154. CHECK(cudaMemcpyAsync(
  155. this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
  156. }
  157. cudaStreamSynchronize(this->stream);
  158. }
  159. void YOLOv8_seg::postprocess(
  160. std::vector<Object>& objs, float score_thres, float iou_thres, int topk, int seg_channels, int seg_h, int seg_w)
  161. {
  162. objs.clear();
  163. auto input_h = this->input_bindings[0].dims.d[2];
  164. auto input_w = this->input_bindings[0].dims.d[3];
  165. auto num_anchors = this->output_bindings[0].dims.d[1];
  166. auto num_channels = this->output_bindings[0].dims.d[2];
  167. auto& dw = this->pparam.dw;
  168. auto& dh = this->pparam.dh;
  169. auto& width = this->pparam.width;
  170. auto& height = this->pparam.height;
  171. auto& ratio = this->pparam.ratio;
  172. auto* output = static_cast<float*>(this->host_ptrs[0]);
  173. cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F, static_cast<float*>(this->host_ptrs[1]));
  174. std::vector<int> labels;
  175. std::vector<float> scores;
  176. std::vector<cv::Rect> bboxes;
  177. std::vector<cv::Mat> mask_confs;
  178. std::vector<int> indices;
  179. for (int i = 0; i < num_anchors; i++) {
  180. float* ptr = output + i * num_channels;
  181. float score = *(ptr + 4);
  182. /*if (score > score_thres) {
  183. printf("num_channels: %d, score: %f\n", num_channels, score);
  184. }*/
  185. if (score > score_thres) {
  186. float x0 = *ptr++ - dw;
  187. float y0 = *ptr++ - dh;
  188. float x1 = *ptr++ - dw;
  189. float y1 = *ptr++ - dh;
  190. x0 = clamp(x0 * ratio, 0.f, width);
  191. y0 = clamp(y0 * ratio, 0.f, height);
  192. x1 = clamp(x1 * ratio, 0.f, width);
  193. y1 = clamp(y1 * ratio, 0.f, height);
  194. int label = *(++ptr);
  195. cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, ++ptr);
  196. mask_confs.push_back(mask_conf);
  197. labels.push_back(label);
  198. scores.push_back(score);
  199. bboxes.push_back(cv::Rect_<float>(x0, y0, x1 - x0, y1 - y0));
  200. }
  201. }
  202. #if defined(BATCHED_NMS)
  203. cv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
  204. #else
  205. cv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
  206. #endif
  207. cv::Mat masks;
  208. int cnt = 0;
  209. for (auto& i : indices) {
  210. if (cnt >= topk) {
  211. break;
  212. }
  213. cv::Rect tmp = bboxes[i];
  214. Object obj;
  215. obj.label = labels[i];
  216. obj.rect = tmp;
  217. obj.prob = scores[i];
  218. masks.push_back(mask_confs[i]);
  219. objs.push_back(obj);
  220. cnt += 1;
  221. }
  222. if (masks.empty()) {
  223. // masks is empty
  224. }
  225. else {
  226. cv::Mat matmulRes = (masks * protos).t();
  227. cv::Mat maskMat = matmulRes.reshape(indices.size(), {seg_h, seg_w});
  228. std::vector<cv::Mat> maskChannels;
  229. cv::split(maskMat, maskChannels);
  230. int scale_dw = dw / input_w * seg_w;
  231. int scale_dh = dh / input_h * seg_h;
  232. cv::Rect roi(scale_dw, scale_dh, seg_w - 2 * scale_dw, seg_h - 2 * scale_dh);
  233. for (int i = 0; i < indices.size(); i++) {
  234. cv::Mat dest, mask;
  235. cv::exp(-maskChannels[i], dest);
  236. dest = 1.0 / (1.0 + dest);
  237. dest = dest(roi);
  238. cv::resize(dest, mask, cv::Size((int)width, (int)height), cv::INTER_LINEAR);
  239. objs[i].boxMask = mask(objs[i].rect) > 0.5f;
  240. }
  241. }
  242. }
  243. void YOLOv8_seg::draw_objects(const cv::Mat& image,
  244. cv::Mat& res,
  245. const std::vector<Object>& objs,
  246. const std::vector<std::string>& CLASS_NAMES,
  247. const std::vector<std::vector<unsigned int>>& COLORS,
  248. const std::vector<std::vector<unsigned int>>& MASK_COLORS)
  249. {
  250. res = image.clone();
  251. cv::Mat mask = image.clone();
  252. for (auto& obj : objs) {
  253. int idx = obj.label;
  254. cv::Scalar color = cv::Scalar(COLORS[idx][0], COLORS[idx][1], COLORS[idx][2]);
  255. cv::Scalar mask_color =
  256. cv::Scalar(MASK_COLORS[idx % 20][0], MASK_COLORS[idx % 20][1], MASK_COLORS[idx % 20][2]);
  257. cv::rectangle(res, obj.rect, color, 2);
  258. char text[256];
  259. sprintf(text, "%s %.1f%%", CLASS_NAMES[idx].c_str(), obj.prob * 100);
  260. mask(obj.rect).setTo(mask_color, obj.boxMask);
  261. int baseLine = 0;
  262. cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
  263. int x = (int)obj.rect.x;
  264. int y = (int)obj.rect.y + 1;
  265. if (y > res.rows)
  266. y = res.rows;
  267. cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
  268. cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
  269. }
  270. cv::addWeighted(res, 0.5, mask, 0.8, 1, res);
  271. }