|
@@ -0,0 +1,597 @@
|
|
|
+#include "Steger.hpp"
|
|
|
+#include <stdlib.h>
|
|
|
+#include <time.h>
|
|
|
+#include <cuda.h>
|
|
|
+#include <cuda_runtime.h>
|
|
|
+#include "detectlines.h"
|
|
|
+#define SQRT_2_PI_INV 0.398942280401432677939946059935
|
|
|
+#define SQRTPI 1.772453850905516027
|
|
|
+#define SQRT2 1.41421356237309504880
|
|
|
+#define UPPERLIMIT 20.0
|
|
|
+
|
|
|
+#define P10 242.66795523053175
|
|
|
+#define P11 21.979261618294152
|
|
|
+#define P12 6.9963834886191355
|
|
|
+#define P13 -.035609843701815385
|
|
|
+#define Q10 215.05887586986120
|
|
|
+#define Q11 91.164905404514901
|
|
|
+#define Q12 15.082797630407787
|
|
|
+#define Q13 1.0
|
|
|
+
|
|
|
+#define P20 300.4592610201616005
|
|
|
+#define P21 451.9189537118729422
|
|
|
+#define P22 339.3208167343436870
|
|
|
+#define P23 152.9892850469404039
|
|
|
+#define P24 43.16222722205673530
|
|
|
+#define P25 7.211758250883093659
|
|
|
+#define P26 .5641955174789739711
|
|
|
+#define P27 -.0000001368648573827167067
|
|
|
+#define Q20 300.4592609569832933
|
|
|
+#define Q21 790.9509253278980272
|
|
|
+#define Q22 931.3540948506096211
|
|
|
+#define Q23 638.9802644656311665
|
|
|
+#define Q24 277.5854447439876434
|
|
|
+#define Q25 77.00015293522947295
|
|
|
+#define Q26 12.78272731962942351
|
|
|
+#define Q27 1.0
|
|
|
+
|
|
|
+#define P30 -.00299610707703542174
|
|
|
+#define P31 -.0494730910623250734
|
|
|
+#define P32 -.226956593539686930
|
|
|
+#define P33 -.278661308609647788
|
|
|
+#define P34 -.0223192459734184686
|
|
|
+#define Q30 .0106209230528467918
|
|
|
+#define Q31 .191308926107829841
|
|
|
+#define Q32 1.05167510706793207
|
|
|
+#define Q33 1.98733201817135256
|
|
|
+#define Q34 1.0
|
|
|
+
|
|
|
+
|
|
|
+void compute_eigenvals(
|
|
|
+double dfdrr,
|
|
|
+double dfdrc,
|
|
|
+double dfdcc,
|
|
|
+double eigval[2],
|
|
|
+double eigvec[2][2])
|
|
|
+{
|
|
|
+ double theta, t, c, s, e1, e2, n1, n2; /* , phi; */
|
|
|
+
|
|
|
+ /* Compute the eigenvalues and eigenvectors of the Hessian cv::Matrix. */
|
|
|
+ if (dfdrc != 0.0) {
|
|
|
+ theta = 0.5*(dfdcc - dfdrr) / dfdrc;
|
|
|
+ t = 1.0 / (fabs(theta) + sqrt(theta*theta + 1.0));
|
|
|
+ if (theta < 0.0) t = -t;
|
|
|
+ c = 1.0 / sqrt(t*t + 1.0);
|
|
|
+ s = t*c;
|
|
|
+ e1 = dfdrr - t*dfdrc;
|
|
|
+ e2 = dfdcc + t*dfdrc;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ c = 1.0;
|
|
|
+ s = 0.0;
|
|
|
+ e1 = dfdrr;
|
|
|
+ e2 = dfdcc;
|
|
|
+ }
|
|
|
+ n1 = c;
|
|
|
+ n2 = -s;
|
|
|
+
|
|
|
+ /* If the absolute value of an eigenvalue is larger than the other, put that
|
|
|
+ eigenvalue into first position. If both are of equal absolute value, put
|
|
|
+ the negative one first. */
|
|
|
+ if (fabs(e1) > fabs(e2)) {
|
|
|
+ eigval[0] = e1;
|
|
|
+ eigval[1] = e2;
|
|
|
+ eigvec[0][0] = n1;
|
|
|
+ eigvec[0][1] = n2;
|
|
|
+ eigvec[1][0] = -n2;
|
|
|
+ eigvec[1][1] = n1;
|
|
|
+ }
|
|
|
+ else if (fabs(e1) < fabs(e2)) {
|
|
|
+ eigval[0] = e2;
|
|
|
+ eigval[1] = e1;
|
|
|
+ eigvec[0][0] = -n2;
|
|
|
+ eigvec[0][1] = n1;
|
|
|
+ eigvec[1][0] = n1;
|
|
|
+ eigvec[1][1] = n2;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ if (e1 < e2) {
|
|
|
+ eigval[0] = e1;
|
|
|
+ eigval[1] = e2;
|
|
|
+ eigvec[0][0] = n1;
|
|
|
+ eigvec[0][1] = n2;
|
|
|
+ eigvec[1][0] = -n2;
|
|
|
+ eigvec[1][1] = n1;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ eigval[0] = e2;
|
|
|
+ eigval[1] = e1;
|
|
|
+ eigvec[0][0] = -n2;
|
|
|
+ eigvec[0][1] = n1;
|
|
|
+ eigvec[1][0] = n1;
|
|
|
+ eigvec[1][1] = n2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void solve_linear(
|
|
|
+double a,
|
|
|
+double b,
|
|
|
+double *t,
|
|
|
+long *num)
|
|
|
+{
|
|
|
+ if (a == 0.0) {
|
|
|
+ *num = 0;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ *num = 1;
|
|
|
+ *t = -b / a;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void compute_line_points(
|
|
|
+ float *ku[5],
|
|
|
+unsigned char *ismax,
|
|
|
+float *ev,
|
|
|
+float *nx,
|
|
|
+float *ny,
|
|
|
+float *px,
|
|
|
+float *py,
|
|
|
+long width,
|
|
|
+long height,
|
|
|
+double low,
|
|
|
+double high,
|
|
|
+long mode)
|
|
|
+{
|
|
|
+ long r, c, l;
|
|
|
+ double k[5];
|
|
|
+ double eigval[2];
|
|
|
+ double eigvec[2][2];
|
|
|
+ double a, b, t;
|
|
|
+ long num;
|
|
|
+ double n1, n2;
|
|
|
+ double p1, p2;
|
|
|
+ double val;
|
|
|
+
|
|
|
+ for (r = 0; r < height; r++)
|
|
|
+ {
|
|
|
+ for (c = 0; c < width; c++)
|
|
|
+ {
|
|
|
+ l = LINCOOR(r, c, width);
|
|
|
+ k[0] = ku[0][l];
|
|
|
+ k[1] = ku[1][l];
|
|
|
+ k[2] = ku[2][l];
|
|
|
+ k[3] = ku[3][l];
|
|
|
+ k[4] = ku[4][l];
|
|
|
+ ev[l] = 0.0;
|
|
|
+ nx[l] = 0.0;
|
|
|
+ ny[l] = 0.0;
|
|
|
+ compute_eigenvals(k[2], k[3], k[4], eigval, eigvec);
|
|
|
+ if (mode == MODE_LIGHT)
|
|
|
+ val = -eigval[0];
|
|
|
+ else
|
|
|
+ val = eigval[0];
|
|
|
+ if (val > 0.0) {
|
|
|
+ ev[l] = val;
|
|
|
+ n1 = eigvec[0][0];
|
|
|
+ n2 = eigvec[0][1];
|
|
|
+ a = k[2] * n1*n1 + 2.0*k[3] * n1*n2 + k[4] * n2*n2;
|
|
|
+ b = k[0] * n1 + k[1] * n2;
|
|
|
+ solve_linear(a, b, &t, &num);
|
|
|
+ if (num != 0) {
|
|
|
+ p1 = t*n1;
|
|
|
+ p2 = t*n2;
|
|
|
+ if (fabs(p1) <= PIXEL_BOUNDARY && fabs(p2) <= PIXEL_BOUNDARY)
|
|
|
+ {
|
|
|
+ if (val >= low)
|
|
|
+ {
|
|
|
+ if (val >= high)
|
|
|
+ ismax[l] = 2;
|
|
|
+ else
|
|
|
+ ismax[l] = 1;
|
|
|
+ }
|
|
|
+ nx[l] = n1;
|
|
|
+ ny[l] = n2;
|
|
|
+ px[l] = r + p1;
|
|
|
+ py[l] = c + p2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+Steger::Steger()
|
|
|
+{
|
|
|
+ m_sigma=3.0;
|
|
|
+ m_low=0.5;
|
|
|
+ m_high=3.5;
|
|
|
+
|
|
|
+ m_mask_0_r=0;
|
|
|
+ m_mask_0_c=0;
|
|
|
+ m_mask_1_r=0;
|
|
|
+ m_mask_1_c=0;
|
|
|
+ m_mask_2_r=0;
|
|
|
+ m_mask_2_c=0;
|
|
|
+
|
|
|
+ m_image=0;
|
|
|
+ m_mid_data=0;
|
|
|
+ m_width=0;
|
|
|
+ m_height=0;
|
|
|
+ for (int i = 0; i < 5; i++)
|
|
|
+ m_k[i] = 0;
|
|
|
+}
|
|
|
+
|
|
|
+Steger::Steger(double sigma, double low, double high)
|
|
|
+{
|
|
|
+
|
|
|
+ m_sigma = sigma;
|
|
|
+ m_low = low;
|
|
|
+ m_high = high;
|
|
|
+ /*if (m_mask_0_r){ cudaFree(m_mask_0_r); m_mask_0_r = 0; m_l_0r = 0; }
|
|
|
+ if (m_mask_0_c){ cudaFree(m_mask_0_c); m_mask_0_c = 0; m_l_0c = 0; }
|
|
|
+ if (m_mask_1_r){ cudaFree(m_mask_1_r); m_mask_1_r = 0; m_l_1r = 0; }
|
|
|
+ if (m_mask_1_c){ cudaFree(m_mask_1_c); m_mask_1_c = 0; m_l_1c = 0; }
|
|
|
+ if (m_mask_2_r){ cudaFree(m_mask_2_r); m_mask_2_r = 0; m_l_2r = 0; }
|
|
|
+ if (m_mask_2_c){ cudaFree(m_mask_2_c); m_mask_2_c = 0; m_l_2c = 0; }*/
|
|
|
+
|
|
|
+ m_mask_0_r = compute_gauss_mask_0(&m_l_0r, sigma);
|
|
|
+ m_mask_0_c = compute_gauss_mask_0(&m_l_0c, sigma);
|
|
|
+ m_mask_1_r = compute_gauss_mask_1(&m_l_1r, sigma);
|
|
|
+ m_mask_1_c = compute_gauss_mask_1(&m_l_1c, sigma);
|
|
|
+ m_mask_2_r = compute_gauss_mask_2(&m_l_2r, sigma);
|
|
|
+ m_mask_2_c = compute_gauss_mask_2(&m_l_2c, sigma);
|
|
|
+ m_image = 0;
|
|
|
+ m_mid_data = 0;
|
|
|
+ for (int i = 0; i < 5; i++)
|
|
|
+ m_k[i] = 0;
|
|
|
+}
|
|
|
+Steger::~Steger()
|
|
|
+{
|
|
|
+ if (m_mask_0_r){ cudaFree(m_mask_0_r); m_mask_0_r = 0; m_l_0r = 0; }
|
|
|
+ if (m_mask_0_c){ cudaFree(m_mask_0_c); m_mask_0_c = 0; m_l_0c = 0; }
|
|
|
+ if (m_mask_1_r){ cudaFree(m_mask_1_r); m_mask_1_r = 0; m_l_1r = 0; }
|
|
|
+ if (m_mask_1_c){ cudaFree(m_mask_1_c); m_mask_1_c = 0; m_l_1c = 0; }
|
|
|
+ if (m_mask_2_r){ cudaFree(m_mask_2_r); m_mask_2_r = 0; m_l_2r = 0; }
|
|
|
+ if (m_mask_2_c){ cudaFree(m_mask_2_c); m_mask_2_c = 0; m_l_2c = 0; }
|
|
|
+ if (m_image){ cudaFree(m_image); }
|
|
|
+ if (m_mid_data){ cudaFree(m_mid_data); }
|
|
|
+ for (int i = 0; i < 5; i++)
|
|
|
+ {
|
|
|
+ if (m_k[i])
|
|
|
+ cudaFree(m_k[i]);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+bool Steger::detect(cv::Mat src, cv::Mat& out)
|
|
|
+{
|
|
|
+ if (src.empty())return false;
|
|
|
+ if (m_width != src.cols || m_height != src.rows)
|
|
|
+ {
|
|
|
+ if (m_image){
|
|
|
+ cudaFree(m_image);
|
|
|
+ }
|
|
|
+ if (m_mid_data){
|
|
|
+ cudaFree(m_mid_data);
|
|
|
+ }
|
|
|
+ for (int i = 0; i < 5; i++)
|
|
|
+ {
|
|
|
+ if (m_k[i])
|
|
|
+ cudaFree(m_k[i]);
|
|
|
+ cudaMalloc((void**)&m_k[i], src.cols*src.rows*sizeof(float));
|
|
|
+ }
|
|
|
+ cudaMalloc((void**)&m_mid_data, src.cols*src.rows*sizeof(float));
|
|
|
+ cudaMalloc((void**)&m_image, src.cols*src.rows*sizeof(float));
|
|
|
+ m_width = src.cols;
|
|
|
+ m_height = src.rows;
|
|
|
+ }
|
|
|
+
|
|
|
+ clock_t t[6];
|
|
|
+ t[0] = clock();
|
|
|
+ cudaMemcpy(m_image, src.data, m_width*m_height*sizeof(float),cudaMemcpyHostToDevice);
|
|
|
+
|
|
|
+
|
|
|
+ run_gpu();
|
|
|
+ unsigned char *ismax;
|
|
|
+ float *ev, *n1, *n2, *p1, *p2;
|
|
|
+
|
|
|
+ ismax = (unsigned char *)malloc(m_width*m_height*sizeof(unsigned char));
|
|
|
+ ev = (float*)malloc(m_width*m_height* sizeof(float));
|
|
|
+ n1 = (float*)malloc(m_width*m_height* sizeof(float));
|
|
|
+ n2 = (float*)malloc(m_width*m_height* sizeof(float));
|
|
|
+ p1 = (float*)malloc(m_width*m_height* sizeof(float));
|
|
|
+ p2 = (float*)malloc(m_width*m_height* sizeof(float));
|
|
|
+ memset(ismax, 0, m_width*m_height*sizeof(unsigned char));
|
|
|
+ memset(ev, 0, m_width*m_height*sizeof(float));
|
|
|
+ t[1] = clock();
|
|
|
+ contour **contours, *cont;
|
|
|
+ long num_cont;
|
|
|
+
|
|
|
+ float* k[5];
|
|
|
+ for (int i = 0; i < 5; i++)
|
|
|
+ {
|
|
|
+ k[i] = (float*)malloc(m_width*m_height* sizeof(float));
|
|
|
+ cudaMemcpy(k[i], m_k[i], m_width*m_height*sizeof(float), cudaMemcpyDeviceToHost);
|
|
|
+ }
|
|
|
+ t[2] = clock();
|
|
|
+ compute_line_points(k, ismax, ev, n1, n2, p1, p2, m_width, m_height, m_low, m_high, MODE_LIGHT);
|
|
|
+ t[3] = clock();
|
|
|
+ compute_contours(ismax, ev, n1, n2, p1, p2, k[0], k[1], &contours, &num_cont, m_sigma,
|
|
|
+ false, MODE_LIGHT, m_low, m_high, m_width, m_height);
|
|
|
+ t[4] = clock();
|
|
|
+ int i, j, points;
|
|
|
+
|
|
|
+ for (i = 0; i < num_cont; i++){
|
|
|
+ cont = contours[i];
|
|
|
+ points = cont->num;
|
|
|
+ for (j = 0; j < points; j++){
|
|
|
+ int y = int(cont->row[j] + 0.5);
|
|
|
+ int x = (int)(cont->col[j] + 0.5);
|
|
|
+ out.at<float>(y, x) = 1.0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ t[5] = clock();
|
|
|
+ free(p2);
|
|
|
+ free(p1);
|
|
|
+ free(n2);
|
|
|
+ free(n1);
|
|
|
+ free(ev);
|
|
|
+ free(ismax);
|
|
|
+
|
|
|
+ for (i = 4; i >= 0; i--)
|
|
|
+ free(k[i]);
|
|
|
+
|
|
|
+ for (i = 0; i < 5; i++)
|
|
|
+ {
|
|
|
+ printf("t%d---t%d : %f\n", i, i + 1, double(t[i + 1] - t[i]) / CLOCKS_PER_SEC);
|
|
|
+ }
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+bool Steger::detect(cv::Mat src, std::vector<cv::Point2f>& points)
|
|
|
+{
|
|
|
+ if (src.empty())return false;
|
|
|
+ if (m_width != src.cols || m_height != src.rows)
|
|
|
+ {
|
|
|
+ if (m_image) {
|
|
|
+ cudaFree(m_image);
|
|
|
+ }
|
|
|
+ if (m_mid_data) {
|
|
|
+ cudaFree(m_mid_data);
|
|
|
+ }
|
|
|
+ for (int i = 0; i < 5; i++)
|
|
|
+ {
|
|
|
+ if (m_k[i])
|
|
|
+ cudaFree(m_k[i]);
|
|
|
+ cudaMalloc((void**)&m_k[i], src.cols*src.rows * sizeof(float));
|
|
|
+ }
|
|
|
+ cudaMalloc((void**)&m_mid_data, src.cols*src.rows * sizeof(float));
|
|
|
+ cudaMalloc((void**)&m_image, src.cols*src.rows * sizeof(float));
|
|
|
+ m_width = src.cols;
|
|
|
+ m_height = src.rows;
|
|
|
+ }
|
|
|
+
|
|
|
+ clock_t t[6];
|
|
|
+ t[0] = clock();
|
|
|
+ cudaMemcpy(m_image, src.data, m_width*m_height * sizeof(float), cudaMemcpyHostToDevice);
|
|
|
+
|
|
|
+
|
|
|
+ run_gpu();
|
|
|
+ unsigned char *ismax;
|
|
|
+ float *ev, *n1, *n2, *p1, *p2;
|
|
|
+
|
|
|
+ ismax = (unsigned char *)malloc(m_width*m_height * sizeof(unsigned char));
|
|
|
+ ev = (float*)malloc(m_width*m_height * sizeof(float));
|
|
|
+ n1 = (float*)malloc(m_width*m_height * sizeof(float));
|
|
|
+ n2 = (float*)malloc(m_width*m_height * sizeof(float));
|
|
|
+ p1 = (float*)malloc(m_width*m_height * sizeof(float));
|
|
|
+ p2 = (float*)malloc(m_width*m_height * sizeof(float));
|
|
|
+ memset(ismax, 0, m_width*m_height * sizeof(unsigned char));
|
|
|
+ memset(ev, 0, m_width*m_height * sizeof(float));
|
|
|
+ t[1] = clock();
|
|
|
+ contour **contours, *cont;
|
|
|
+ long num_cont;
|
|
|
+
|
|
|
+ float* k[5];
|
|
|
+ for (int i = 0; i < 5; i++)
|
|
|
+ {
|
|
|
+ k[i] = (float*)malloc(m_width*m_height * sizeof(float));
|
|
|
+ cudaMemcpy(k[i], m_k[i], m_width*m_height * sizeof(float), cudaMemcpyDeviceToHost);
|
|
|
+ }
|
|
|
+ t[2] = clock();
|
|
|
+ compute_line_points(k, ismax, ev, n1, n2, p1, p2, m_width, m_height, m_low, m_high, MODE_LIGHT);
|
|
|
+ t[3] = clock();
|
|
|
+ compute_contours(ismax, ev, n1, n2, p1, p2, k[0], k[1], &contours, &num_cont, m_sigma,
|
|
|
+ false, MODE_LIGHT, m_low, m_high, m_width, m_height);
|
|
|
+ t[4] = clock();
|
|
|
+ int i, j, point_num;
|
|
|
+
|
|
|
+ points.clear();
|
|
|
+ for (i = 0; i < num_cont; i++) {
|
|
|
+ cont = contours[i];
|
|
|
+ point_num = cont->num;
|
|
|
+ for (j = 0; j < point_num; j++) {
|
|
|
+ cv::Point2f point(cont->col[j], cont->row[j]);
|
|
|
+ points.push_back(point);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ t[5] = clock();
|
|
|
+ free(p2);
|
|
|
+ free(p1);
|
|
|
+ free(n2);
|
|
|
+ free(n1);
|
|
|
+ free(ev);
|
|
|
+ free(ismax);
|
|
|
+
|
|
|
+ for (i = 4; i >= 0; i--)
|
|
|
+ free(k[i]);
|
|
|
+
|
|
|
+ for (i = 0; i < 5; i++)
|
|
|
+ {
|
|
|
+ printf("t%d---t%d : %f\n", i, i + 1, double(t[i + 1] - t[i]) / CLOCKS_PER_SEC);
|
|
|
+ }
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+float* Steger::compute_gauss_mask_0(long* num,double sigma)
|
|
|
+{
|
|
|
+ long i, n;
|
|
|
+ float limit;
|
|
|
+ float *h, *mask,*h_c;
|
|
|
+
|
|
|
+ limit = MASK_SIZE(MAX_SIZE_MASK_0, sigma); /* Error < 0.001 on each side */
|
|
|
+ n = (long)limit;
|
|
|
+ cudaMalloc((void**)&h, (2 * n + 1)*sizeof(float));
|
|
|
+ h_c =(float*)malloc((2*n+1)*sizeof(float));
|
|
|
+ mask = h_c + n;
|
|
|
+ for (i = -n + 1; i <= n - 1; i++)
|
|
|
+ mask[i] = phi0(-i + 0.5, sigma) - phi0(-i - 0.5, sigma);
|
|
|
+ mask[-n] = 1.0 - phi0(n - 0.5, sigma);
|
|
|
+ mask[n] = phi0(-n + 0.5, sigma);
|
|
|
+ cudaMemcpy(h, h_c, (2 * n + 1)*sizeof(float), cudaMemcpyHostToDevice);
|
|
|
+ free(h_c);
|
|
|
+ *num = n;
|
|
|
+
|
|
|
+ return h;
|
|
|
+}
|
|
|
+
|
|
|
+float* Steger::compute_gauss_mask_1(long *num, double sigma)
|
|
|
+{
|
|
|
+ long i, n;
|
|
|
+ float limit;
|
|
|
+ float *h, *mask,*h_c;
|
|
|
+
|
|
|
+ limit = MASK_SIZE(MAX_SIZE_MASK_1, sigma); /* Error < 0.001 on each side */
|
|
|
+ n = (long)limit;
|
|
|
+ cudaMalloc((void**)&h, (2 * n + 1)*sizeof(float));
|
|
|
+ h_c = (float*)malloc((2 * n + 1)*sizeof(float));
|
|
|
+ mask = h_c + n;
|
|
|
+ for (i = -n + 1; i <= n - 1; i++)
|
|
|
+ mask[i] = phi1(-i + 0.5, sigma) - phi1(-i - 0.5, sigma);
|
|
|
+ mask[-n] = -phi1(n - 0.5, sigma);
|
|
|
+ mask[n] = phi1(-n + 0.5, sigma);
|
|
|
+ cudaMemcpy(h, h_c, (2 * n + 1)*sizeof(float), cudaMemcpyHostToDevice);
|
|
|
+ free(h_c);
|
|
|
+ *num = n;
|
|
|
+ return h;
|
|
|
+}
|
|
|
+
|
|
|
+/* Second derivative of Gaussian smoothing mask */
|
|
|
+float* Steger::compute_gauss_mask_2(long *num, double sigma)
|
|
|
+{
|
|
|
+ long i, n;
|
|
|
+ float limit;
|
|
|
+ float *h, *mask,*h_c;
|
|
|
+
|
|
|
+ limit = MASK_SIZE(MAX_SIZE_MASK_2, sigma); /* Error < 0.001 on each side */
|
|
|
+ n = (long)limit;
|
|
|
+ cudaMalloc((void**)&h, (2 * n + 1)*sizeof(float));
|
|
|
+ h_c = (float*)malloc((2 * n + 1)*sizeof(float));
|
|
|
+ mask = h_c + n;
|
|
|
+ for (i = -n + 1; i <= n - 1; i++)
|
|
|
+ mask[i] = phi2(-i + 0.5, sigma) - phi2(-i - 0.5, sigma);
|
|
|
+ mask[-n] = -phi2(n - 0.5, sigma);
|
|
|
+ mask[n] = phi2(-n + 0.5, sigma);
|
|
|
+ cudaMemcpy(h, h_c, (2 * n + 1)*sizeof(float), cudaMemcpyHostToDevice);
|
|
|
+ *num = n;
|
|
|
+ return h;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+float Steger::phi0(float x, float sigma)
|
|
|
+{
|
|
|
+ return normal(x / sigma);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+/* The Gaussian function */
|
|
|
+float Steger::phi1(float x, float sigma)
|
|
|
+{
|
|
|
+ float t;
|
|
|
+
|
|
|
+ t = x / sigma;
|
|
|
+ float ret = SQRT_2_PI_INV / sigma*exp(-0.5*t*t);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+/* First derivative of the Gaussian function */
|
|
|
+float Steger::phi2(float x, float sigma)
|
|
|
+{
|
|
|
+ float t;
|
|
|
+
|
|
|
+ t = x / sigma;
|
|
|
+ float ret = -x*SQRT_2_PI_INV / pow(float(sigma), float(3.0))*exp(-0.5*t*t);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+float Steger::normal(float x)
|
|
|
+{
|
|
|
+ int sn;
|
|
|
+ float R1, R2, y, y2, y3, y4, y5, y6, y7;
|
|
|
+ float erf, erfc, z, z2, z3, z4;
|
|
|
+ float phi;
|
|
|
+
|
|
|
+ if (x < -UPPERLIMIT) return 0.0;
|
|
|
+ if (x > UPPERLIMIT) return 1.0;
|
|
|
+
|
|
|
+ y = x / SQRT2;
|
|
|
+ if (y < 0) {
|
|
|
+ y = -y;
|
|
|
+ sn = -1;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ sn = 1;
|
|
|
+
|
|
|
+ y2 = y * y;
|
|
|
+ y4 = y2 * y2;
|
|
|
+ y6 = y4 * y2;
|
|
|
+
|
|
|
+ if (y < 0.46875) {
|
|
|
+ R1 = P10 + P11 * y2 + P12 * y4 + P13 * y6;
|
|
|
+ R2 = Q10 + Q11 * y2 + Q12 * y4 + Q13 * y6;
|
|
|
+ erf = y * R1 / R2;
|
|
|
+ if (sn == 1)
|
|
|
+ phi = 0.5 + 0.5*erf;
|
|
|
+ else
|
|
|
+ phi = 0.5 - 0.5*erf;
|
|
|
+ }
|
|
|
+ else if (y < 4.0) {
|
|
|
+ y3 = y2 * y;
|
|
|
+ y5 = y4 * y;
|
|
|
+ y7 = y6 * y;
|
|
|
+ R1 = P20 + P21 * y + P22 * y2 + P23 * y3 +
|
|
|
+ P24 * y4 + P25 * y5 + P26 * y6 + P27 * y7;
|
|
|
+ R2 = Q20 + Q21 * y + Q22 * y2 + Q23 * y3 +
|
|
|
+ Q24 * y4 + Q25 * y5 + Q26 * y6 + Q27 * y7;
|
|
|
+ erfc = exp(-y2) * R1 / R2;
|
|
|
+ if (sn == 1)
|
|
|
+ phi = 1.0 - 0.5*erfc;
|
|
|
+ else
|
|
|
+ phi = 0.5*erfc;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ z = y4;
|
|
|
+ z2 = z * z;
|
|
|
+ z3 = z2 * z;
|
|
|
+ z4 = z2 * z2;
|
|
|
+ R1 = P30 + P31 * z + P32 * z2 + P33 * z3 + P34 * z4;
|
|
|
+ R2 = Q30 + Q31 * z + Q32 * z2 + Q33 * z3 + Q34 * z4;
|
|
|
+ erfc = (exp(-y2) / y) * (1.0 / SQRTPI + R1 / (R2 * y2));
|
|
|
+ if (sn == 1)
|
|
|
+ phi = 1.0 - 0.5*erfc;
|
|
|
+ else
|
|
|
+ phi = 0.5*erfc;
|
|
|
+ }
|
|
|
+
|
|
|
+ return phi;
|
|
|
+}
|