123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822 |
- // This file is part of OpenCV project.
- // It is subject to the license terms in the LICENSE file found in the top-level directory
- // of this distribution and at http://opencv.org/license.html.
- #include "test_precomp.hpp"
- #include <opencv2/dnn/shape_utils.hpp>
- #include "npy_blob.hpp"
- namespace opencv_test { namespace {
- template<typename TString>
- static std::string _tf(TString filename, bool required = true)
- {
- String rootFolder = "dnn/";
- return findDataFile(rootFolder + filename, required);
- }
- class Test_Model : public DNNTestLayer
- {
- public:
- void testDetectModel(const std::string& weights, const std::string& cfg,
- const std::string& imgPath, const std::vector<int>& refClassIds,
- const std::vector<float>& refConfidences,
- const std::vector<Rect2d>& refBoxes,
- double scoreDiff, double iouDiff,
- double confThreshold = 0.24, double nmsThreshold = 0.0,
- const Size& size = {-1, -1}, Scalar mean = Scalar(),
- double scale = 1.0, bool swapRB = false, bool crop = false,
- bool nmsAcrossClasses = false)
- {
- checkBackend();
- Mat frame = imread(imgPath);
- DetectionModel model(weights, cfg);
- model.setInputSize(size).setInputMean(mean).setInputScale(scale)
- .setInputSwapRB(swapRB).setInputCrop(crop);
- model.setPreferableBackend(backend);
- model.setPreferableTarget(target);
- model.setNmsAcrossClasses(nmsAcrossClasses);
- std::vector<int> classIds;
- std::vector<float> confidences;
- std::vector<Rect> boxes;
- model.detect(frame, classIds, confidences, boxes, confThreshold, nmsThreshold);
- std::vector<Rect2d> boxesDouble(boxes.size());
- for (int i = 0; i < boxes.size(); i++) {
- boxesDouble[i] = boxes[i];
- }
- normAssertDetections(refClassIds, refConfidences, refBoxes, classIds,
- confidences, boxesDouble, "",
- confThreshold, scoreDiff, iouDiff);
- }
- void testClassifyModel(const std::string& weights, const std::string& cfg,
- const std::string& imgPath, std::pair<int, float> ref, float norm,
- const Size& size = {-1, -1}, Scalar mean = Scalar(),
- double scale = 1.0, bool swapRB = false, bool crop = false)
- {
- checkBackend();
- Mat frame = imread(imgPath);
- ClassificationModel model(weights, cfg);
- model.setInputSize(size).setInputMean(mean).setInputScale(scale)
- .setInputSwapRB(swapRB).setInputCrop(crop);
- std::pair<int, float> prediction = model.classify(frame);
- EXPECT_EQ(prediction.first, ref.first);
- ASSERT_NEAR(prediction.second, ref.second, norm);
- }
- void testKeypointsModel(const std::string& weights, const std::string& cfg,
- const Mat& frame, const Mat& exp, float norm,
- const Size& size = {-1, -1}, Scalar mean = Scalar(),
- double scale = 1.0, bool swapRB = false, bool crop = false)
- {
- checkBackend();
- std::vector<Point2f> points;
- KeypointsModel model(weights, cfg);
- model.setInputSize(size).setInputMean(mean).setInputScale(scale)
- .setInputSwapRB(swapRB).setInputCrop(crop);
- model.setPreferableBackend(backend);
- model.setPreferableTarget(target);
- points = model.estimate(frame, 0.5);
- Mat out = Mat(points).reshape(1);
- normAssert(exp, out, "", norm, norm);
- }
- void testSegmentationModel(const std::string& weights_file, const std::string& config_file,
- const std::string& inImgPath, const std::string& outImgPath,
- float norm, const Size& size = {-1, -1}, Scalar mean = Scalar(),
- double scale = 1.0, bool swapRB = false, bool crop = false)
- {
- checkBackend();
- Mat frame = imread(inImgPath);
- Mat mask;
- Mat exp = imread(outImgPath, 0);
- SegmentationModel model(weights_file, config_file);
- model.setInputSize(size).setInputMean(mean).setInputScale(scale)
- .setInputSwapRB(swapRB).setInputCrop(crop);
- model.setPreferableBackend(backend);
- model.setPreferableTarget(target);
- model.segment(frame, mask);
- normAssert(mask, exp, "", norm, norm);
- }
- void testTextRecognitionModel(const std::string& weights, const std::string& cfg,
- const std::string& imgPath, const std::string& seq,
- const std::string& decodeType, const std::vector<std::string>& vocabulary,
- const Size& size = {-1, -1}, Scalar mean = Scalar(),
- double scale = 1.0, bool swapRB = false, bool crop = false)
- {
- checkBackend();
- Mat frame = imread(imgPath, IMREAD_GRAYSCALE);
- TextRecognitionModel model(weights, cfg);
- model.setDecodeType(decodeType)
- .setVocabulary(vocabulary)
- .setInputSize(size).setInputMean(mean).setInputScale(scale)
- .setInputSwapRB(swapRB).setInputCrop(crop);
- model.setPreferableBackend(backend);
- model.setPreferableTarget(target);
- std::string result = model.recognize(frame);
- EXPECT_EQ(result, seq) << "Full frame: " << imgPath;
- std::vector<Rect> rois;
- rois.push_back(Rect(0, 0, frame.cols, frame.rows));
- rois.push_back(Rect(0, 0, frame.cols, frame.rows)); // twice
- std::vector<std::string> results;
- model.recognize(frame, rois, results);
- EXPECT_EQ((size_t)2u, results.size()) << "ROI: " << imgPath;
- EXPECT_EQ(results[0], seq) << "ROI[0]: " << imgPath;
- EXPECT_EQ(results[1], seq) << "ROI[1]: " << imgPath;
- }
- void testTextDetectionModelByDB(const std::string& weights, const std::string& cfg,
- const std::string& imgPath, const std::vector<std::vector<Point>>& gt,
- float binThresh, float polyThresh,
- uint maxCandidates, double unclipRatio,
- const Size& size = {-1, -1}, Scalar mean = Scalar(), Scalar scale = Scalar::all(1.0),
- double boxes_iou_diff = 0.05, bool swapRB = false, bool crop = false)
- {
- checkBackend();
- Mat frame = imread(imgPath);
- TextDetectionModel_DB model(weights, cfg);
- model.setBinaryThreshold(binThresh)
- .setPolygonThreshold(polyThresh)
- .setUnclipRatio(unclipRatio)
- .setMaxCandidates(maxCandidates)
- .setInputSize(size).setInputMean(mean).setInputScale(scale)
- .setInputSwapRB(swapRB).setInputCrop(crop);
- model.setPreferableBackend(backend);
- model.setPreferableTarget(target);
- // 1. Check common TextDetectionModel API through RotatedRect
- std::vector<cv::RotatedRect> results;
- model.detectTextRectangles(frame, results);
- EXPECT_GT(results.size(), (size_t)0);
- std::vector< std::vector<Point> > contours;
- for (size_t i = 0; i < results.size(); i++)
- {
- const RotatedRect& box = results[i];
- Mat contour;
- boxPoints(box, contour);
- std::vector<Point> contour2i(4);
- for (int i = 0; i < 4; i++)
- {
- contour2i[i].x = cvRound(contour.at<float>(i, 0));
- contour2i[i].y = cvRound(contour.at<float>(i, 1));
- }
- contours.push_back(contour2i);
- }
- #if 0 // test debug
- Mat result = frame.clone();
- drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
- imshow("result", result); // imwrite("result.png", result);
- waitKey(0);
- #endif
- normAssertTextDetections(gt, contours, "", boxes_iou_diff);
- // 2. Check quadrangle-based API
- // std::vector< std::vector<Point> > contours;
- model.detect(frame, contours);
- #if 0 // test debug
- Mat result = frame.clone();
- drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
- imshow("result_contours", result); // imwrite("result_contours.png", result);
- waitKey(0);
- #endif
- normAssertTextDetections(gt, contours, "", boxes_iou_diff);
- }
- void testTextDetectionModelByEAST(
- const std::string& weights, const std::string& cfg,
- const std::string& imgPath, const std::vector<RotatedRect>& gt,
- float confThresh, float nmsThresh,
- const Size& size = {-1, -1}, Scalar mean = Scalar(),
- double scale = 1.0, bool swapRB = false, bool crop = false,
- double eps_center = 5/*pixels*/, double eps_size = 5/*pixels*/, double eps_angle = 1
- )
- {
- checkBackend();
- Mat frame = imread(imgPath);
- TextDetectionModel_EAST model(weights, cfg);
- model.setConfidenceThreshold(confThresh)
- .setNMSThreshold(nmsThresh)
- .setInputSize(size).setInputMean(mean).setInputScale(scale)
- .setInputSwapRB(swapRB).setInputCrop(crop);
- model.setPreferableBackend(backend);
- model.setPreferableTarget(target);
- std::vector<cv::RotatedRect> results;
- model.detectTextRectangles(frame, results);
- EXPECT_EQ(results.size(), (size_t)1);
- for (size_t i = 0; i < results.size(); i++)
- {
- const RotatedRect& box = results[i];
- #if 0 // test debug
- Mat contour;
- boxPoints(box, contour);
- std::vector<Point> contour2i(4);
- for (int i = 0; i < 4; i++)
- {
- contour2i[i].x = cvRound(contour.at<float>(i, 0));
- contour2i[i].y = cvRound(contour.at<float>(i, 1));
- }
- std::vector< std::vector<Point> > contours;
- contours.push_back(contour2i);
- Mat result = frame.clone();
- drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
- imshow("result", result); //imwrite("result.png", result);
- waitKey(0);
- #endif
- const RotatedRect& gtBox = gt[i];
- EXPECT_NEAR(box.center.x, gtBox.center.x, eps_center);
- EXPECT_NEAR(box.center.y, gtBox.center.y, eps_center);
- EXPECT_NEAR(box.size.width, gtBox.size.width, eps_size);
- EXPECT_NEAR(box.size.height, gtBox.size.height, eps_size);
- EXPECT_NEAR(box.angle, gtBox.angle, eps_angle);
- }
- }
- };
- TEST_P(Test_Model, Classify)
- {
- std::pair<int, float> ref(652, 0.641789);
- std::string img_path = _tf("grace_hopper_227.png");
- std::string config_file = _tf("bvlc_alexnet.prototxt");
- std::string weights_file = _tf("bvlc_alexnet.caffemodel", false);
- Size size{227, 227};
- float norm = 1e-4;
- testClassifyModel(weights_file, config_file, img_path, ref, norm, size);
- }
- TEST_P(Test_Model, DetectRegion)
- {
- applyTestTag(
- CV_TEST_TAG_LONG,
- CV_TEST_TAG_MEMORY_2GB
- );
- #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
- // accuracy
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
- // accuracy
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
- // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused
- if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
- #endif
- #if defined(INF_ENGINE_RELEASE)
- if (target == DNN_TARGET_MYRIAD
- && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
- #endif
- std::vector<int> refClassIds = {6, 1, 11};
- std::vector<float> refConfidences = {0.750469f, 0.780879f, 0.901615f};
- std::vector<Rect2d> refBoxes = {Rect2d(240, 53, 135, 72),
- Rect2d(112, 109, 192, 200),
- Rect2d(58, 141, 117, 249)};
- std::string img_path = _tf("dog416.png");
- std::string weights_file = _tf("yolo-voc.weights", false);
- std::string config_file = _tf("yolo-voc.cfg");
- double scale = 1.0 / 255.0;
- Size size{416, 416};
- bool swapRB = true;
- double confThreshold = 0.24;
- double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.397 : 0.4;
- double scoreDiff = 8e-5, iouDiff = 1e-5;
- if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_CPU_FP16)
- {
- scoreDiff = 1e-2;
- iouDiff = 1.6e-2;
- }
- testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
- refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
- Scalar(), scale, swapRB);
- }
- TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses)
- {
- applyTestTag(
- CV_TEST_TAG_LONG,
- CV_TEST_TAG_MEMORY_2GB
- );
- #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
- // accuracy
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
- // accuracy
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
- if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
- #endif
- #if defined(INF_ENGINE_RELEASE)
- if (target == DNN_TARGET_MYRIAD
- && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
- #endif
- std::vector<int> refClassIds = { 6, 11 };
- std::vector<float> refConfidences = { 0.750469f, 0.901615f };
- std::vector<Rect2d> refBoxes = { Rect2d(240, 53, 135, 72),
- Rect2d(58, 141, 117, 249) };
- std::string img_path = _tf("dog416.png");
- std::string weights_file = _tf("yolo-voc.weights", false);
- std::string config_file = _tf("yolo-voc.cfg");
- double scale = 1.0 / 255.0;
- Size size{ 416, 416 };
- bool swapRB = true;
- bool crop = false;
- bool nmsAcrossClasses = true;
- double confThreshold = 0.24;
- double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15;
- double scoreDiff = 8e-5, iouDiff = 1e-5;
- if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_CPU_FP16)
- {
- scoreDiff = 1e-2;
- iouDiff = 1.6e-2;
- }
- testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
- refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
- Scalar(), scale, swapRB, crop,
- nmsAcrossClasses);
- }
- TEST_P(Test_Model, DetectionOutput)
- {
- #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
- // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
- // While validating node 'v1::Reshape bbox_pred_reshape (ave_bbox_pred_rois[0]:f32{1,8,1,1}, Constant_388[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
- // Requested output shape {1,300,8,1} is incompatible with input shape {1, 8, 1, 1}
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
- // Exception: Function contains several inputs and outputs with one friendly name! (HETERO bug?)
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE)
- // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused
- if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
- if (target == DNN_TARGET_MYRIAD)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
- #endif
- std::vector<int> refClassIds = {7, 12};
- std::vector<float> refConfidences = {0.991359f, 0.94786f};
- std::vector<Rect2d> refBoxes = {Rect2d(491, 81, 212, 98),
- Rect2d(132, 223, 207, 344)};
- std::string img_path = _tf("dog416.png");
- std::string weights_file = _tf("resnet50_rfcn_final.caffemodel", false);
- std::string config_file = _tf("rfcn_pascal_voc_resnet50.prototxt");
- Scalar mean = Scalar(102.9801, 115.9465, 122.7717);
- Size size{800, 600};
- double scoreDiff = default_l1, iouDiff = 1e-5;
- float confThreshold = 0.8;
- double nmsThreshold = 0.0;
- if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_CPU_FP16)
- {
- if (backend == DNN_BACKEND_OPENCV)
- scoreDiff = 4e-3;
- #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
- else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
- scoreDiff = 4e-2;
- #endif
- else
- scoreDiff = 2e-2;
- iouDiff = 1.8e-1;
- }
- testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
- scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);
- }
- TEST_P(Test_Model, DetectionMobilenetSSD)
- {
- Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));
- ref = ref.reshape(1, ref.size[2]);
- std::string img_path = _tf("street.png");
- Mat frame = imread(img_path);
- int frameWidth = frame.cols;
- int frameHeight = frame.rows;
- std::vector<int> refClassIds;
- std::vector<float> refConfidences;
- std::vector<Rect2d> refBoxes;
- for (int i = 0; i < ref.rows; i++)
- {
- refClassIds.emplace_back(ref.at<float>(i, 1));
- refConfidences.emplace_back(ref.at<float>(i, 2));
- int left = ref.at<float>(i, 3) * frameWidth;
- int top = ref.at<float>(i, 4) * frameHeight;
- int right = ref.at<float>(i, 5) * frameWidth;
- int bottom = ref.at<float>(i, 6) * frameHeight;
- int width = right - left + 1;
- int height = bottom - top + 1;
- refBoxes.emplace_back(left, top, width, height);
- }
- std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false);
- std::string config_file = _tf("MobileNetSSD_deploy.prototxt");
- Scalar mean = Scalar(127.5, 127.5, 127.5);
- double scale = 1.0 / 127.5;
- Size size{300, 300};
- double scoreDiff = 1e-5, iouDiff = 1e-5;
- if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CPU_FP16)
- {
- scoreDiff = 1.7e-2;
- iouDiff = 6.91e-2;
- }
- else if (target == DNN_TARGET_MYRIAD)
- {
- scoreDiff = 0.017;
- if (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
- iouDiff = 0.1;
- }
- else if (target == DNN_TARGET_CUDA_FP16)
- {
- scoreDiff = 0.0021;
- iouDiff = 1e-2;
- }
- float confThreshold = FLT_MIN;
- double nmsThreshold = 0.0;
- testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
- scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale);
- }
- TEST_P(Test_Model, Keypoints_pose)
- {
- if (target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
- if (target == DNN_TARGET_CPU_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
- #ifdef HAVE_INF_ENGINE
- if (target == DNN_TARGET_MYRIAD)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #endif
- Mat inp = imread(_tf("pose.png"));
- std::string weights = _tf("onnx/models/lightweight_pose_estimation_201912.onnx", false);
- float kpdata[] = {
- 237.65625f, 78.25f, 237.65625f, 136.9375f,
- 190.125f, 136.9375f, 142.59375f, 195.625f, 79.21875f, 176.0625f, 285.1875f, 117.375f,
- 348.5625f, 195.625f, 396.09375f, 176.0625f, 205.96875f, 313.0f, 205.96875f, 430.375f,
- 205.96875f, 528.1875f, 269.34375f, 293.4375f, 253.5f, 430.375f, 237.65625f, 528.1875f,
- 221.8125f, 58.6875f, 253.5f, 58.6875f, 205.96875f, 78.25f, 253.5f, 58.6875f
- };
- Mat exp(18, 2, CV_32FC1, kpdata);
- Size size{256, 256};
- float norm = 1e-4;
- double scale = 1.0/255;
- Scalar mean = Scalar(128, 128, 128);
- bool swapRB = false;
- // Ref. Range: [58.6875, 508.625]
- if (target == DNN_TARGET_CUDA_FP16)
- norm = 20; // l1 = 1.5, lInf = 20
- testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB);
- }
- TEST_P(Test_Model, Keypoints_face)
- {
- #if defined(INF_ENGINE_RELEASE)
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #endif
- Mat inp = imread(_tf("gray_face.png"), 0);
- std::string weights = _tf("onnx/models/facial_keypoints.onnx", false);
- Mat exp = blobFromNPY(_tf("facial_keypoints_exp.npy"));
- Size size{224, 224};
- double scale = 1.0/255;
- Scalar mean = Scalar();
- bool swapRB = false;
- // Ref. Range: [-1.1784188, 1.7758257]
- float norm = 1e-4;
- if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CPU_FP16)
- norm = 5e-3;
- if (target == DNN_TARGET_MYRIAD)
- {
- // Myriad2: l1 = 0.0004, lInf = 0.002
- // MyriadX: l1 = 0.003, lInf = 0.009
- norm = 0.009;
- }
- if (target == DNN_TARGET_CUDA_FP16)
- norm = 0.004; // l1 = 0.0006, lInf = 0.004
- testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB);
- }
- TEST_P(Test_Model, Detection_normalized)
- {
- std::string img_path = _tf("grace_hopper_227.png");
- std::vector<int> refClassIds = {15};
- std::vector<float> refConfidences = {0.999222f};
- std::vector<Rect2d> refBoxes = {Rect2d(0, 4, 227, 222)};
- std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false);
- std::string config_file = _tf("MobileNetSSD_deploy.prototxt");
- Scalar mean = Scalar(127.5, 127.5, 127.5);
- double scale = 1.0 / 127.5;
- Size size{300, 300};
- double scoreDiff = 1e-5, iouDiff = 1e-5;
- float confThreshold = FLT_MIN;
- double nmsThreshold = 0.0;
- if (target == DNN_TARGET_CUDA)
- {
- scoreDiff = 3e-4;
- iouDiff = 0.018;
- }
- if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_CPU_FP16)
- {
- scoreDiff = 5e-3;
- iouDiff = 0.09;
- }
- #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020040000)
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
- {
- scoreDiff = 0.02;
- iouDiff = 0.1f;
- }
- #endif
- testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
- scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale);
- }
- TEST_P(Test_Model, Segmentation)
- {
- applyTestTag(
- CV_TEST_TAG_MEMORY_2GB
- );
- float norm = 0;
- #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
- // Failed to allocate graph: NC_ERROR
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- // accuracy
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
- {
- norm = 25.0f; // depends on OS/OpenCL version
- }
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
- // Failed to allocate graph: NC_ERROR
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- // cnn_network_ngraph_impl.cpp:104 Function contains several inputs and outputs with one friendly name: 'upscore2'!
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- // cnn_network_ngraph_impl.cpp:104 Function contains several inputs and outputs with one friendly name: 'upscore2'!
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #elif defined(INF_ENGINE_RELEASE)
- // Failed to allocate graph: NC_ERROR
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- #endif
- if ((backend == DNN_BACKEND_OPENCV && (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CPU_FP16))
- || (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16))
- {
- norm = 2.0f; // l1 = 0.01 lInf = 2
- }
- std::string inp = _tf("dog416.png");
- std::string weights_file = _tf("fcn8s-heavy-pascal.prototxt");
- std::string config_file = _tf("fcn8s-heavy-pascal.caffemodel", false);
- std::string exp = _tf("segmentation_exp.png");
- Size size{128, 128};
- double scale = 1.0;
- Scalar mean = Scalar();
- bool swapRB = false;
- testSegmentationModel(weights_file, config_file, inp, exp, norm, size, mean, scale, swapRB);
- }
- TEST_P(Test_Model, TextRecognition)
- {
- #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
- // FIXIT: dnn/src/ie_ngraph.cpp:494: error: (-215:Assertion failed) !inps.empty() in function 'createNet'
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
- // Node Transpose_79 was not assigned on any pointed device
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
- applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
- CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
- );
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
- // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
- applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
- CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
- );
- #endif
- std::string imgPath = _tf("text_rec_test.png");
- std::string weightPath = _tf("onnx/models/crnn.onnx", false);
- std::string seq = "welcome";
- Size size{100, 32};
- double scale = 1.0 / 127.5;
- Scalar mean = Scalar(127.5);
- std::string decodeType = "CTC-greedy";
- std::vector<std::string> vocabulary = {"0","1","2","3","4","5","6","7","8","9",
- "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
- testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale);
- }
- TEST_P(Test_Model, TextRecognitionWithCTCPrefixBeamSearch)
- {
- #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
- // Node Transpose_79 was not assigned on any pointed device
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
- applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
- CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
- );
- #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
- // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape
- if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
- applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
- CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
- );
- #endif
- std::string imgPath = _tf("text_rec_test.png");
- std::string weightPath = _tf("onnx/models/crnn.onnx", false);
- std::string seq = "welcome";
- Size size{100, 32};
- double scale = 1.0 / 127.5;
- Scalar mean = Scalar(127.5);
- std::string decodeType = "CTC-prefix-beam-search";
- std::vector<std::string> vocabulary = {"0","1","2","3","4","5","6","7","8","9",
- "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
- testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale);
- }
- TEST_P(Test_Model, TextDetectionByDB)
- {
- if (target == DNN_TARGET_OPENCL_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
- if (target == DNN_TARGET_CPU_FP16)
- applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
- std::string imgPath = _tf("text_det_test1.png");
- std::string weightPathDB = _tf("onnx/models/DB_TD500_resnet50.onnx", false);
- std::string weightPathPPDB = _tf("onnx/models/PP_OCRv3_DB_text_det.onnx", false);
- // GroundTruth
- std::vector<std::vector<Point>> gt = {
- { Point(142, 193), Point(136, 164), Point(213, 150), Point(219, 178) },
- { Point(136, 165), Point(122, 114), Point(319, 71), Point(330, 122) }
- };
- Size size{736, 736};
- Scalar scaleDB = Scalar::all(1.0 / 255.0);
- Scalar meanDB = Scalar(122.67891434, 116.66876762, 104.00698793);
- // new mean and stddev
- Scalar meanPPDB = Scalar(123.675, 116.28, 103.53);
- Scalar stddevPPDB = Scalar(0.229, 0.224, 0.225);
- Scalar scalePPDB = scaleDB / stddevPPDB;
- float binThresh = 0.3;
- float polyThresh = 0.5;
- uint maxCandidates = 200;
- double unclipRatio = 2.0;
- {
- SCOPED_TRACE("Original DB");
- testTextDetectionModelByDB(weightPathDB, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, meanDB, scaleDB, 0.05f);
- }
- {
- SCOPED_TRACE("PP-OCRDBv3");
- testTextDetectionModelByDB(weightPathPPDB, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, meanPPDB, scalePPDB, 0.21f);
- }
- }
- TEST_P(Test_Model, TextDetectionByEAST)
- {
- std::string imgPath = _tf("text_det_test2.jpg");
- std::string weightPath = _tf("frozen_east_text_detection.pb", false);
- // GroundTruth
- std::vector<RotatedRect> gt = {
- RotatedRect(Point2f(657.55f, 409.5f), Size2f(316.84f, 62.45f), -4.79)
- };
- // Model parameters
- Size size{320, 320};
- double scale = 1.0;
- Scalar mean = Scalar(123.68, 116.78, 103.94);
- bool swapRB = true;
- // Detection algorithm parameters
- float confThresh = 0.5;
- float nmsThresh = 0.4;
- double eps_center = 5/*pixels*/;
- double eps_size = 5/*pixels*/;
- double eps_angle = 1;
- if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16)
- {
- eps_center = 10;
- eps_size = 25;
- eps_angle = 3;
- }
- testTextDetectionModelByEAST(weightPath, "", imgPath, gt, confThresh, nmsThresh, size, mean, scale, swapRB, false/*crop*/,
- eps_center, eps_size, eps_angle
- );
- }
- INSTANTIATE_TEST_CASE_P(/**/, Test_Model, dnnBackendsAndTargets());
- }} // namespace
|