|
@@ -0,0 +1,319 @@
|
|
|
+//
|
|
|
+// Created by ubuntu on 3/16/23.
|
|
|
+//
|
|
|
+
|
|
|
+#include "yolov8-seg.h"
|
|
|
+
|
|
|
+
|
|
|
+using namespace seg;
|
|
|
+
|
|
|
+YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path)
|
|
|
+{
|
|
|
+ std::ifstream file(engine_file_path, std::ios::binary);
|
|
|
+ assert(file.good());
|
|
|
+ file.seekg(0, std::ios::end);
|
|
|
+ auto size = file.tellg();
|
|
|
+ file.seekg(0, std::ios::beg);
|
|
|
+ char* trtModelStream = new char[size];
|
|
|
+ assert(trtModelStream);
|
|
|
+ file.read(trtModelStream, size);
|
|
|
+ file.close();
|
|
|
+ initLibNvInferPlugins(&this->gLogger, "");
|
|
|
+ this->runtime = nvinfer1::createInferRuntime(this->gLogger);
|
|
|
+ assert(this->runtime != nullptr);
|
|
|
+
|
|
|
+ this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
|
|
|
+ assert(this->engine != nullptr);
|
|
|
+ delete[] trtModelStream;
|
|
|
+ this->context = this->engine->createExecutionContext();
|
|
|
+
|
|
|
+ assert(this->context != nullptr);
|
|
|
+ cudaStreamCreate(&this->stream);
|
|
|
+ this->num_bindings = this->engine->getNbBindings();
|
|
|
+
|
|
|
+ for (int i = 0; i < this->num_bindings; ++i) {
|
|
|
+ Binding binding;
|
|
|
+ nvinfer1::Dims dims;
|
|
|
+ nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
|
|
|
+ std::string name = this->engine->getBindingName(i);
|
|
|
+ binding.name = name;
|
|
|
+ binding.dsize = type_to_size(dtype);
|
|
|
+
|
|
|
+ bool IsInput = engine->bindingIsInput(i);
|
|
|
+ if (IsInput) {
|
|
|
+ this->num_inputs += 1;
|
|
|
+ dims = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
|
|
|
+ binding.size = get_size_by_dims(dims);
|
|
|
+ binding.dims = dims;
|
|
|
+ this->input_bindings.push_back(binding);
|
|
|
+ // set max opt shape
|
|
|
+ this->context->setBindingDimensions(i, dims);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ dims = this->context->getBindingDimensions(i);
|
|
|
+ binding.size = get_size_by_dims(dims);
|
|
|
+ binding.dims = dims;
|
|
|
+ this->output_bindings.push_back(binding);
|
|
|
+ this->num_outputs += 1;
|
|
|
+ }
|
|
|
+ // printf("name: %s, size: %ld, dims: %d %d %d %d %d\n",
|
|
|
+ // name.c_str(), binding.dsize, dims.nbDims, dims.d[0], dims.d[1], dims.d[2], dims.d[3]);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+YOLOv8_seg::~YOLOv8_seg()
|
|
|
+{
|
|
|
+ this->context->destroy();
|
|
|
+ this->engine->destroy();
|
|
|
+ this->runtime->destroy();
|
|
|
+ cudaStreamDestroy(this->stream);
|
|
|
+ for (auto& ptr : this->device_ptrs) {
|
|
|
+ CHECK(cudaFree(ptr));
|
|
|
+ }
|
|
|
+
|
|
|
+ for (auto& ptr : this->host_ptrs) {
|
|
|
+ CHECK(cudaFreeHost(ptr));
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void YOLOv8_seg::make_pipe(bool warmup)
|
|
|
+{
|
|
|
+
|
|
|
+ for (auto& bindings : this->input_bindings) {
|
|
|
+ void* d_ptr;
|
|
|
+ CHECK(cudaMalloc(&d_ptr, bindings.size * bindings.dsize));
|
|
|
+ this->device_ptrs.push_back(d_ptr);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (auto& bindings : this->output_bindings) {
|
|
|
+ void * d_ptr, *h_ptr;
|
|
|
+ size_t size = bindings.size * bindings.dsize;
|
|
|
+ CHECK(cudaMalloc(&d_ptr, size));
|
|
|
+ CHECK(cudaHostAlloc(&h_ptr, size, 0));
|
|
|
+ this->device_ptrs.push_back(d_ptr);
|
|
|
+ this->host_ptrs.push_back(h_ptr);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (warmup) {
|
|
|
+ for (int i = 0; i < 10; i++) {
|
|
|
+ for (auto& bindings : this->input_bindings) {
|
|
|
+ size_t size = bindings.size * bindings.dsize;
|
|
|
+ void* h_ptr = malloc(size);
|
|
|
+ memset(h_ptr, 0, size);
|
|
|
+ CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
|
|
|
+ free(h_ptr);
|
|
|
+ }
|
|
|
+ this->infer();
|
|
|
+ }
|
|
|
+ printf("model warmup 10 times\n");
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void YOLOv8_seg::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
|
|
|
+{
|
|
|
+ const float inp_h = size.height;
|
|
|
+ const float inp_w = size.width;
|
|
|
+ float height = image.rows;
|
|
|
+ float width = image.cols;
|
|
|
+
|
|
|
+ float r = std::min(inp_h / height, inp_w / width);
|
|
|
+ int padw = std::round(width * r);
|
|
|
+ int padh = std::round(height * r);
|
|
|
+
|
|
|
+ cv::Mat tmp;
|
|
|
+ if ((int)width != padw || (int)height != padh) {
|
|
|
+ cv::resize(image, tmp, cv::Size(padw, padh));
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ tmp = image.clone();
|
|
|
+ }
|
|
|
+
|
|
|
+ float dw = inp_w - padw;
|
|
|
+ float dh = inp_h - padh;
|
|
|
+
|
|
|
+ dw /= 2.0f;
|
|
|
+ dh /= 2.0f;
|
|
|
+ int top = int(std::round(dh - 0.1f));
|
|
|
+ int bottom = int(std::round(dh + 0.1f));
|
|
|
+ int left = int(std::round(dw - 0.1f));
|
|
|
+ int right = int(std::round(dw + 0.1f));
|
|
|
+
|
|
|
+ cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
|
|
|
+
|
|
|
+ cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
|
|
|
+ this->pparam.ratio = 1 / r;
|
|
|
+ this->pparam.dw = dw;
|
|
|
+ this->pparam.dh = dh;
|
|
|
+ this->pparam.height = height;
|
|
|
+ this->pparam.width = width;
|
|
|
+}
|
|
|
+
|
|
|
+void YOLOv8_seg::copy_from_Mat(const cv::Mat& image)
|
|
|
+{
|
|
|
+ cv::Mat nchw;
|
|
|
+ auto& in_binding = this->input_bindings[0];
|
|
|
+ auto width = in_binding.dims.d[3];
|
|
|
+ auto height = in_binding.dims.d[2];
|
|
|
+ cv::Size size{width, height};
|
|
|
+ this->letterbox(image, nchw, size);
|
|
|
+
|
|
|
+ this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
|
|
|
+
|
|
|
+ CHECK(cudaMemcpyAsync(
|
|
|
+ this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
|
|
|
+}
|
|
|
+
|
|
|
+void YOLOv8_seg::copy_from_Mat(const cv::Mat& image, cv::Size& size)
|
|
|
+{
|
|
|
+ cv::Mat nchw;
|
|
|
+ this->letterbox(image, nchw, size);
|
|
|
+ this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
|
|
|
+ CHECK(cudaMemcpyAsync(
|
|
|
+ this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
|
|
|
+}
|
|
|
+
|
|
|
+void YOLOv8_seg::infer()
|
|
|
+{
|
|
|
+
|
|
|
+ this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
|
|
|
+ for (int i = 0; i < this->num_outputs; i++) {
|
|
|
+ size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
|
|
|
+ CHECK(cudaMemcpyAsync(
|
|
|
+ this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
|
|
|
+ }
|
|
|
+ cudaStreamSynchronize(this->stream);
|
|
|
+}
|
|
|
+
|
|
|
+void YOLOv8_seg::postprocess(
|
|
|
+ std::vector<Object>& objs, float score_thres, float iou_thres, int topk, int seg_channels, int seg_h, int seg_w)
|
|
|
+{
|
|
|
+ objs.clear();
|
|
|
+ auto input_h = this->input_bindings[0].dims.d[2];
|
|
|
+ auto input_w = this->input_bindings[0].dims.d[3];
|
|
|
+ auto num_anchors = this->output_bindings[0].dims.d[1];
|
|
|
+ auto num_channels = this->output_bindings[0].dims.d[2];
|
|
|
+
|
|
|
+ auto& dw = this->pparam.dw;
|
|
|
+ auto& dh = this->pparam.dh;
|
|
|
+ auto& width = this->pparam.width;
|
|
|
+ auto& height = this->pparam.height;
|
|
|
+ auto& ratio = this->pparam.ratio;
|
|
|
+
|
|
|
+ auto* output = static_cast<float*>(this->host_ptrs[0]);
|
|
|
+ cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F, static_cast<float*>(this->host_ptrs[1]));
|
|
|
+
|
|
|
+ std::vector<int> labels;
|
|
|
+ std::vector<float> scores;
|
|
|
+ std::vector<cv::Rect> bboxes;
|
|
|
+ std::vector<cv::Mat> mask_confs;
|
|
|
+ std::vector<int> indices;
|
|
|
+
|
|
|
+ for (int i = 0; i < num_anchors; i++) {
|
|
|
+ float* ptr = output + i * num_channels;
|
|
|
+ float score = *(ptr + 4);
|
|
|
+
|
|
|
+ /*if (score > score_thres) {
|
|
|
+ printf("num_channels: %d, score: %f\n", num_channels, score);
|
|
|
+ }*/
|
|
|
+
|
|
|
+ if (score > score_thres) {
|
|
|
+ float x0 = *ptr++ - dw;
|
|
|
+ float y0 = *ptr++ - dh;
|
|
|
+ float x1 = *ptr++ - dw;
|
|
|
+ float y1 = *ptr++ - dh;
|
|
|
+
|
|
|
+ x0 = clamp(x0 * ratio, 0.f, width);
|
|
|
+ y0 = clamp(y0 * ratio, 0.f, height);
|
|
|
+ x1 = clamp(x1 * ratio, 0.f, width);
|
|
|
+ y1 = clamp(y1 * ratio, 0.f, height);
|
|
|
+
|
|
|
+ int label = *(++ptr);
|
|
|
+ cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, ++ptr);
|
|
|
+ mask_confs.push_back(mask_conf);
|
|
|
+ labels.push_back(label);
|
|
|
+ scores.push_back(score);
|
|
|
+ bboxes.push_back(cv::Rect_<float>(x0, y0, x1 - x0, y1 - y0));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+#if defined(BATCHED_NMS)
|
|
|
+ cv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
|
|
|
+#else
|
|
|
+ cv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
|
|
|
+#endif
|
|
|
+
|
|
|
+ cv::Mat masks;
|
|
|
+ int cnt = 0;
|
|
|
+ for (auto& i : indices) {
|
|
|
+ if (cnt >= topk) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ cv::Rect tmp = bboxes[i];
|
|
|
+ Object obj;
|
|
|
+ obj.label = labels[i];
|
|
|
+ obj.rect = tmp;
|
|
|
+ obj.prob = scores[i];
|
|
|
+ masks.push_back(mask_confs[i]);
|
|
|
+ objs.push_back(obj);
|
|
|
+ cnt += 1;
|
|
|
+ }
|
|
|
+ if (masks.empty()) {
|
|
|
+ // masks is empty
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ cv::Mat matmulRes = (masks * protos).t();
|
|
|
+ cv::Mat maskMat = matmulRes.reshape(indices.size(), {seg_h, seg_w});
|
|
|
+
|
|
|
+ std::vector<cv::Mat> maskChannels;
|
|
|
+ cv::split(maskMat, maskChannels);
|
|
|
+ int scale_dw = dw / input_w * seg_w;
|
|
|
+ int scale_dh = dh / input_h * seg_h;
|
|
|
+
|
|
|
+ cv::Rect roi(scale_dw, scale_dh, seg_w - 2 * scale_dw, seg_h - 2 * scale_dh);
|
|
|
+
|
|
|
+ for (int i = 0; i < indices.size(); i++) {
|
|
|
+ cv::Mat dest, mask;
|
|
|
+ cv::exp(-maskChannels[i], dest);
|
|
|
+ dest = 1.0 / (1.0 + dest);
|
|
|
+ dest = dest(roi);
|
|
|
+ cv::resize(dest, mask, cv::Size((int)width, (int)height), cv::INTER_LINEAR);
|
|
|
+ objs[i].boxMask = mask(objs[i].rect) > 0.5f;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void YOLOv8_seg::draw_objects(const cv::Mat& image,
|
|
|
+ cv::Mat& res,
|
|
|
+ const std::vector<Object>& objs,
|
|
|
+ const std::vector<std::string>& CLASS_NAMES,
|
|
|
+ const std::vector<std::vector<unsigned int>>& COLORS,
|
|
|
+ const std::vector<std::vector<unsigned int>>& MASK_COLORS)
|
|
|
+{
|
|
|
+ res = image.clone();
|
|
|
+ cv::Mat mask = image.clone();
|
|
|
+ for (auto& obj : objs) {
|
|
|
+ int idx = obj.label;
|
|
|
+ cv::Scalar color = cv::Scalar(COLORS[idx][0], COLORS[idx][1], COLORS[idx][2]);
|
|
|
+ cv::Scalar mask_color =
|
|
|
+ cv::Scalar(MASK_COLORS[idx % 20][0], MASK_COLORS[idx % 20][1], MASK_COLORS[idx % 20][2]);
|
|
|
+ cv::rectangle(res, obj.rect, color, 2);
|
|
|
+
|
|
|
+ char text[256];
|
|
|
+ sprintf(text, "%s %.1f%%", CLASS_NAMES[idx].c_str(), obj.prob * 100);
|
|
|
+ mask(obj.rect).setTo(mask_color, obj.boxMask);
|
|
|
+
|
|
|
+ int baseLine = 0;
|
|
|
+ cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
|
|
|
+
|
|
|
+ int x = (int)obj.rect.x;
|
|
|
+ int y = (int)obj.rect.y + 1;
|
|
|
+
|
|
|
+ if (y > res.rows)
|
|
|
+ y = res.rows;
|
|
|
+
|
|
|
+ cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
|
|
|
+
|
|
|
+ cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
|
|
|
+ }
|
|
|
+ cv::addWeighted(res, 0.5, mask, 0.8, 1, res);
|
|
|
+}
|