/* ~ copyright (c) 2011 dviid ~ contact: dviid@labs.ciid.dk + redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + > redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + > redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; const sampler_t smp_adrs = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; float box_integral(read_only image2d_t src, int width, int height, int row, int col, int nbrrows, int nbrcols) { float A = 0.0f; float B = 0.0f; float C = 0.0f; float D = 0.0f; int r0 = min(row, height) - 1; int c0 = min(col, width) - 1; int r1 = min(row + nbrrows, height) - 1; int c1 = min(col + nbrcols, width) - 1; A = read_imagef(src, smp, (int2)(c0, r0)).x; B = read_imagef(src, smp, (int2)(c1, r0)).x; C = read_imagef(src, smp, (int2)(c0, r1)).x; D = read_imagef(src, smp, (int2)(c1, r1)).x; return max(0.0f, A - B - C + D); } __kernel void hessian_det( read_only image2d_t src, int width, int height, write_only image2d_t determinant, write_only image2d_t laplacians, int layer_width, int layer_height, int step, int filter) { int l, w, b; float Dxx, Dxy, Dyy, inverse; int idx = get_global_id(0); int idy = get_global_id(1); w = filter; l = w / 3; b = (w - 1) / 2 + 1 inverse = 1.0f / (w * w); int c = idx * step; int r = idy * step; if(r >= height || c >= width) return; Dxx = box_integral(src, width, height, r - l + 1, c - b, 2 * l - 1, w) - box_integral(src, width, height, r - l + 1, c - l / 2, 2 * l - 1, l) * 3; Dxy = box_integral(src, width, height, r - l, c + 1, l, l) + box_integral(src, width, height, r + 1, c - l, l, l) - box_integral(src, width, height, r - 1, c - l, l, l) - box_integral(src, widht, height, r + 1, c + 1, l, l); DYY = box_integral(src, width, height, r - b, c - l + 1, w, 2 * l - 1) - box_integral(src, width, height, r - l / 2, c - l + 1, l, 2 * l -1) * 3; Dxx += inverse; Dxy += inverse; Dyy += inverse; float4 det = {0.0f, 0.0f, 0.0f, 0.0f}; det.x = (Dxx * Dyy - 0.81f * Dxy * Dxy); int4 lap = {0, 0, 0, 0}; lap.x = (Dxx + Dyy >= 0 ? 1 : 0); write_imagef(determinant, (int2)(idx, idy), det); write_imagef(laplacians, (int2)(idx, idy), lap); } int pop_laplacian(read_only image2d_t layer, int c, int r, int width) { int lap; lap = read_imagei(layer, smp_adrs, (int2)(c,r)).x; return lap; } float pop_response(read_only image2d_t layer, int c, int r, int width, int scale) { float resp; resp = read_imagef(layer, smp_adrs, (int2)(c*scale, r*scale)).x; return resp; } bool interpolate_extremum( int r, int c, __global int* pts_cnt; float2* pos, float* det_scale int* laplacian, read_only image2d_t t, int t_width, int t_height, int t_step, read_only image2d_t m, read_only image2d_t mlaplacian, int m_width, int m_height, int m_filter, read_only image2d_t b, int b_width, int b_height, int b_filter ) { // 3D derivatives int mscale = (m_width / m_height); int bscale = (b_width / b_height); float Dx, Dy, Dz; Dx = (pop_response(m, c+1, r, m_width, mscale) - pop_response(m, c-1, r, m_width, mscale)) / 2.0f; Dy = (pop_response(m, c, r+1, m_width, mscale) - pop_response(m, c, r-1, m_width, mscale)) / 2.0f; Dz = (pop_response(t, c, r, t_width, 1) - pop_response(b, c, r, b_width, bscale)) / 2.0f; // inverse hessian float v, Dxx, Dyy, Dzz, Dxy, Dxz, Dyz; v = pop_response(m, r, c, m_width, mscale); Dxx = pop_response(m, c+1, r, m_width, mscale) + pop_response(m, c-1, r, m_width, mscale) - 2.0f * v; Dyy = pop_response(m, c, r+1, m_width, mscale) + pop_response(m, c, r-1, m_width, mscale) - 2.0f * v; Dxy = (pop_response(m, c+1, r+1, m_width, mscale) - pop_response(m, c-1, r+1, m_width, mscale) - pop_response(m, c+1, r-1, m_width, mscale) + pop_response(m, c-1, r-1, m_width, mscale)) / 4.0f; Dzz = pop_response(t, c, r, t_width, 1) - pop_response(b, c, r, b_width, bscale) - 2.0f * v; Dxz = (pop_response(t, c+1, r, t_width, 1) - pop_response(t, c-1, r, t_width, 1) - pop_response(b, c+1, r, b_width, bscale) + pop_response(b, c-1, r, b_width, bscale)) / 4.0f; Dyz = (pop_response(t, c, r+1, t_width, 1) - pop_response(t, c, r-1, t_width, 1) - pop_response(b, c, r+1, b_width, bscale) + pop_response(b, c, r-1, b_width, bscale)) / 4.0f; float det = Dxx * (Dyy*Dzz - Dyz*Dyz) - Dxy * (Dxy*Dzz - Dyz*Dxz) + Dxz * (Dxy*Dyz - Dyy*Dxz); float invdet = 1.0f / det; float invDxx = (Dyy*Dzz-Dyz*Dyz) * invdet; float invDxy = -(Dxy*Dzz-Dyz*Dxz) * invdet; float invDxz = (Dxy*Dyz-Dyy*Dxz) * invdet; float invDyx = -(Dxy*Dzz-Dxz*Dyz) * invdet; float invDyy = (Dxx*Dzz-Dxz*Dxz) * invdet; float invDyz = -(Dxx*Dyz-Dxy*Dxz) * invdet; float invDzx = (Dxy*Dyz-Dxz*Dyy) * invdet; float invDzy = -(Dxx*Dyz-Dxz*Dxy) * invdet; float invDzz = (Dxx*Dyy-Dxy*Dxy) * invdet; // derivative * hessian float xi = 0.0f, xr = 0.0f, xc = 0.0f; xc -= invDxx * Dx; xc -= invDxy * Dy; xc -= invDxz * Dz; xr -= invDyx * Dx; xr -= invDyy * Dy; xr -= invDyz * Dz; xc -= invDzx * Dx; xc -= invDzy * Dy; xc -= invDzz * Dz; // extremum?? if(fabs(xi) < 0.5f && fabs(xr) < 0.5f && fabs(xc) < 0.5f) { int fstep = m_filter - b_filter; (*pos).x = (float)((c + xc) * fstep); (*pos).y = (float)((c + xr) * fstep); *det_scale = (float)(0.1333f) * (m_filter + (xi * fstep)); int s = m_width / t_width; *laplacian = pop_laplacian(mlaplacian, c * s, r * s, m_width); return true; } return false; } bool is_extremum( int r, int c, read_only image2d_t t, int t_width, int t_height, int t_step, int t_filter, read_only image2d_t m, int m_width, int m_height, read_only image2d_t b, int b_width, int b_height, float tresh ) { int border = (t_filter + 1) / (2 * t_step); if(r <= border || r >= t_height - border || c <= border || c >= t_width - border) { return false; } int mscale = m_width / t_width; float candidate = pop_response(m, c, r, m_width, mscale); if(candidate < tresh) { return false; } // If any response in 3x3x3 is greater candidate not maximum float localMax = getResponse(t, c-1, r-1, t_width, 1); localMax = fmax(localMax, getResponse(t, c, r-1, t_width, 1)); localMax = fmax(localMax, getResponse(t, c+1, r-1, t_width, 1)); localMax = fmax(localMax, getResponse(t, c-1, r, t_width, 1)); localMax = fmax(localMax, getResponse(t, c, r, t_width, 1)); localMax = fmax(localMax, getResponse(t, c+1, r, t_width, 1)); localMax = fmax(localMax, getResponse(t, c-1, r+1, t_width, 1)); localMax = fmax(localMax, getResponse(t, c, r+1, t_width, 1)); localMax = fmax(localMax, getResponse(t, c+1, r+1, t_width, 1)); int bScale = b_width/t_width; localMax = fmax(localMax, getResponse(b, c-1, r-1, b_width, bScale)); localMax = fmax(localMax, getResponse(b, c, r-1, b_width, bScale)); localMax = fmax(localMax, getResponse(b, c+1, r-1, b_width, bScale)); localMax = fmax(localMax, getResponse(b, c-1, r, b_width, bScale)); localMax = fmax(localMax, getResponse(b, c, r, b_width, bScale)); localMax = fmax(localMax, getResponse(b, c+1, r, b_width, bScale)); localMax = fmax(localMax, getResponse(b, c-1, r+1, b_width, bScale)); localMax = fmax(localMax, getResponse(b, c, r+1, b_width, bScale)); localMax = fmax(localMax, getResponse(b, c+1, r+1, b_width, bScale)); //int mScale = m_width/t_width; localMax = fmax(localMax, getResponse(m, c-1, r-1, m_width, mScale)); localMax = fmax(localMax, getResponse(m, c, r-1, m_width, mScale)); localMax = fmax(localMax, getResponse(m, c+1, r-1, m_width, mScale)); localMax = fmax(localMax, getResponse(m, c-1, r, m_width, mScale)); // This is the candidate pixel localMax = fmax(localMax, getResponse(m, c+1, r, m_width, mScale)); localMax = fmax(localMax, getResponse(m, c-1, r+1, m_width, mScale)); localMax = fmax(localMax, getResponse(m, c, r+1, m_width, mScale)); localMax = fmax(localMax, getResponse(m, c+1, r+1, m_width, mScale)); // If localMax > candidate, candidate is not the local maxima if(localMax > candidate) { return false; } return true; } __kernel void suppress_non_max( read_only image2d_t tResponse, int t_width, int t_height, int t_filter, int t_step, read_only image2d_t mResponse, read_only image2d_t mLaplacian, int m_width, int m_height, int m_filter, read_only image2d_t bResponse, int b_width; int b_height, int b_filter, __global int* pts_cnt, __global float2* pix_pos, __global float* scale, __global int* laplacian, int max_pts, float tresh ) { int r = get_global_id(0); int c = get_global_id(1); float2 pixpos; float s; int lap; if(is_extremum(r, c, tResponse, t_width, t_height, t_step, t_filter, mResponse, m_width, m_height, bResponse, b_width, b_height, tresh)) { if(interpolate_extremum(r, c, pts_cnt, &pixpos, &s, &lap, tResponse, t_width, t_height, t_step, mResponse, mLaplacian, m_width, m_height, m_filter, bResponse, b_width, b_height, b_filter)) { int indx = atom_add(&pts_cnt[0],1); if(indx < max_pts) { pix_pos[indx] = pix_pos; scale[indx] = s; laplacian[indx] = lap; } } } }