373 lines
13 KiB
Common Lisp
373 lines
13 KiB
Common Lisp
|
|
/*
|
||
|
|
~ copyright (c) 2011 dviid
|
||
|
|
~ contact: dviid@labs.ciid.dk
|
||
|
|
|
||
|
|
+ redistribution and use in source and binary forms, with or without
|
||
|
|
+ modification, are permitted provided that the following conditions
|
||
|
|
+ are met:
|
||
|
|
+ > redistributions of source code must retain the above copyright
|
||
|
|
+ notice, this list of conditions and the following disclaimer.
|
||
|
|
+ > redistributions in binary form must reproduce the above copyright
|
||
|
|
+ notice, this list of conditions and the following disclaimer in
|
||
|
|
+ the documentation and/or other materials provided with the
|
||
|
|
+ distribution.
|
||
|
|
|
||
|
|
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
|
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
|
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||
|
|
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||
|
|
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||
|
|
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||
|
|
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||
|
|
+ OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||
|
|
+ AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||
|
|
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||
|
|
+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||
|
|
+ SUCH DAMAGE.
|
||
|
|
|
||
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
|
|
*/
|
||
|
|
|
||
|
|
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
||
|
|
|
||
|
|
const sampler_t smp_adrs = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
||
|
|
|
||
|
|
|
||
|
|
float box_integral(read_only image2d_t src, int width, int height, int row, int col, int nbrrows, int nbrcols)
|
||
|
|
{
|
||
|
|
float A = 0.0f;
|
||
|
|
float B = 0.0f;
|
||
|
|
float C = 0.0f;
|
||
|
|
float D = 0.0f;
|
||
|
|
|
||
|
|
int r0 = min(row, height) - 1;
|
||
|
|
int c0 = min(col, width) - 1;
|
||
|
|
int r1 = min(row + nbrrows, height) - 1;
|
||
|
|
int c1 = min(col + nbrcols, width) - 1;
|
||
|
|
|
||
|
|
A = read_imagef(src, smp, (int2)(c0, r0)).x;
|
||
|
|
B = read_imagef(src, smp, (int2)(c1, r0)).x;
|
||
|
|
C = read_imagef(src, smp, (int2)(c0, r1)).x;
|
||
|
|
D = read_imagef(src, smp, (int2)(c1, r1)).x;
|
||
|
|
|
||
|
|
return max(0.0f, A - B - C + D);
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
__kernel void hessian_det(
|
||
|
|
read_only image2d_t src,
|
||
|
|
int width,
|
||
|
|
int height,
|
||
|
|
write_only image2d_t determinant,
|
||
|
|
write_only image2d_t laplacians,
|
||
|
|
int layer_width,
|
||
|
|
int layer_height,
|
||
|
|
int step,
|
||
|
|
int filter)
|
||
|
|
{
|
||
|
|
int l, w, b;
|
||
|
|
float Dxx, Dxy, Dyy, inverse;
|
||
|
|
|
||
|
|
int idx = get_global_id(0);
|
||
|
|
int idy = get_global_id(1);
|
||
|
|
|
||
|
|
w = filter;
|
||
|
|
l = w / 3;
|
||
|
|
b = (w - 1) / 2 + 1
|
||
|
|
inverse = 1.0f / (w * w);
|
||
|
|
|
||
|
|
int c = idx * step;
|
||
|
|
int r = idy * step;
|
||
|
|
|
||
|
|
if(r >= height || c >= width) return;
|
||
|
|
|
||
|
|
Dxx = box_integral(src, width, height, r - l + 1, c - b, 2 * l - 1, w) -
|
||
|
|
box_integral(src, width, height, r - l + 1, c - l / 2, 2 * l - 1, l) * 3;
|
||
|
|
|
||
|
|
Dxy = box_integral(src, width, height, r - l, c + 1, l, l) +
|
||
|
|
box_integral(src, width, height, r + 1, c - l, l, l) -
|
||
|
|
box_integral(src, width, height, r - 1, c - l, l, l) -
|
||
|
|
box_integral(src, widht, height, r + 1, c + 1, l, l);
|
||
|
|
|
||
|
|
DYY = box_integral(src, width, height, r - b, c - l + 1, w, 2 * l - 1) -
|
||
|
|
box_integral(src, width, height, r - l / 2, c - l + 1, l, 2 * l -1) * 3;
|
||
|
|
|
||
|
|
Dxx += inverse;
|
||
|
|
Dxy += inverse;
|
||
|
|
Dyy += inverse;
|
||
|
|
|
||
|
|
float4 det = {0.0f, 0.0f, 0.0f, 0.0f};
|
||
|
|
det.x = (Dxx * Dyy - 0.81f * Dxy * Dxy);
|
||
|
|
|
||
|
|
int4 lap = {0, 0, 0, 0};
|
||
|
|
lap.x = (Dxx + Dyy >= 0 ? 1 : 0);
|
||
|
|
|
||
|
|
write_imagef(determinant, (int2)(idx, idy), det);
|
||
|
|
write_imagef(laplacians, (int2)(idx, idy), lap);
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
int pop_laplacian(read_only image2d_t layer, int c, int r, int width)
|
||
|
|
{
|
||
|
|
int lap;
|
||
|
|
lap = read_imagei(layer, smp_adrs, (int2)(c,r)).x;
|
||
|
|
return lap;
|
||
|
|
}
|
||
|
|
|
||
|
|
float pop_response(read_only image2d_t layer, int c, int r, int width, int scale)
|
||
|
|
{
|
||
|
|
float resp;
|
||
|
|
resp = read_imagef(layer, smp_adrs, (int2)(c*scale, r*scale)).x;
|
||
|
|
return resp;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool interpolate_extremum(
|
||
|
|
int r,
|
||
|
|
int c,
|
||
|
|
__global int* pts_cnt;
|
||
|
|
float2* pos,
|
||
|
|
float* det_scale
|
||
|
|
int* laplacian,
|
||
|
|
read_only image2d_t t,
|
||
|
|
int t_width,
|
||
|
|
int t_height,
|
||
|
|
int t_step,
|
||
|
|
read_only image2d_t m,
|
||
|
|
read_only image2d_t mlaplacian,
|
||
|
|
int m_width,
|
||
|
|
int m_height,
|
||
|
|
int m_filter,
|
||
|
|
read_only image2d_t b,
|
||
|
|
int b_width,
|
||
|
|
int b_height,
|
||
|
|
int b_filter
|
||
|
|
)
|
||
|
|
{
|
||
|
|
|
||
|
|
// 3D derivatives
|
||
|
|
|
||
|
|
int mscale = (m_width / m_height);
|
||
|
|
int bscale = (b_width / b_height);
|
||
|
|
|
||
|
|
float Dx, Dy, Dz;
|
||
|
|
|
||
|
|
Dx = (pop_response(m, c+1, r, m_width, mscale) -
|
||
|
|
pop_response(m, c-1, r, m_width, mscale)) / 2.0f;
|
||
|
|
Dy = (pop_response(m, c, r+1, m_width, mscale) -
|
||
|
|
pop_response(m, c, r-1, m_width, mscale)) / 2.0f;
|
||
|
|
|
||
|
|
Dz = (pop_response(t, c, r, t_width, 1) -
|
||
|
|
pop_response(b, c, r, b_width, bscale)) / 2.0f;
|
||
|
|
|
||
|
|
// inverse hessian
|
||
|
|
|
||
|
|
float v, Dxx, Dyy, Dzz, Dxy, Dxz, Dyz;
|
||
|
|
|
||
|
|
v = pop_response(m, r, c, m_width, mscale);
|
||
|
|
|
||
|
|
Dxx = pop_response(m, c+1, r, m_width, mscale) +
|
||
|
|
pop_response(m, c-1, r, m_width, mscale) - 2.0f * v;
|
||
|
|
|
||
|
|
Dyy = pop_response(m, c, r+1, m_width, mscale) +
|
||
|
|
pop_response(m, c, r-1, m_width, mscale) - 2.0f * v;
|
||
|
|
|
||
|
|
Dxy = (pop_response(m, c+1, r+1, m_width, mscale) -
|
||
|
|
pop_response(m, c-1, r+1, m_width, mscale) -
|
||
|
|
pop_response(m, c+1, r-1, m_width, mscale) +
|
||
|
|
pop_response(m, c-1, r-1, m_width, mscale)) / 4.0f;
|
||
|
|
|
||
|
|
Dzz = pop_response(t, c, r, t_width, 1) -
|
||
|
|
pop_response(b, c, r, b_width, bscale) - 2.0f * v;
|
||
|
|
|
||
|
|
Dxz = (pop_response(t, c+1, r, t_width, 1) -
|
||
|
|
pop_response(t, c-1, r, t_width, 1) -
|
||
|
|
pop_response(b, c+1, r, b_width, bscale) +
|
||
|
|
pop_response(b, c-1, r, b_width, bscale)) / 4.0f;
|
||
|
|
|
||
|
|
Dyz = (pop_response(t, c, r+1, t_width, 1) -
|
||
|
|
pop_response(t, c, r-1, t_width, 1) -
|
||
|
|
pop_response(b, c, r+1, b_width, bscale) +
|
||
|
|
pop_response(b, c, r-1, b_width, bscale)) / 4.0f;
|
||
|
|
|
||
|
|
float det = Dxx * (Dyy*Dzz - Dyz*Dyz) -
|
||
|
|
Dxy * (Dxy*Dzz - Dyz*Dxz) +
|
||
|
|
Dxz * (Dxy*Dyz - Dyy*Dxz);
|
||
|
|
|
||
|
|
float invdet = 1.0f / det;
|
||
|
|
|
||
|
|
float invDxx = (Dyy*Dzz-Dyz*Dyz) * invdet;
|
||
|
|
float invDxy = -(Dxy*Dzz-Dyz*Dxz) * invdet;
|
||
|
|
float invDxz = (Dxy*Dyz-Dyy*Dxz) * invdet;
|
||
|
|
float invDyx = -(Dxy*Dzz-Dxz*Dyz) * invdet;
|
||
|
|
float invDyy = (Dxx*Dzz-Dxz*Dxz) * invdet;
|
||
|
|
float invDyz = -(Dxx*Dyz-Dxy*Dxz) * invdet;
|
||
|
|
float invDzx = (Dxy*Dyz-Dxz*Dyy) * invdet;
|
||
|
|
float invDzy = -(Dxx*Dyz-Dxz*Dxy) * invdet;
|
||
|
|
float invDzz = (Dxx*Dyy-Dxy*Dxy) * invdet;
|
||
|
|
|
||
|
|
// derivative * hessian
|
||
|
|
|
||
|
|
float xi = 0.0f, xr = 0.0f, xc = 0.0f;
|
||
|
|
|
||
|
|
xc -= invDxx * Dx;
|
||
|
|
xc -= invDxy * Dy;
|
||
|
|
xc -= invDxz * Dz;
|
||
|
|
|
||
|
|
xr -= invDyx * Dx;
|
||
|
|
xr -= invDyy * Dy;
|
||
|
|
xr -= invDyz * Dz;
|
||
|
|
|
||
|
|
xc -= invDzx * Dx;
|
||
|
|
xc -= invDzy * Dy;
|
||
|
|
xc -= invDzz * Dz;
|
||
|
|
|
||
|
|
// extremum??
|
||
|
|
if(fabs(xi) < 0.5f && fabs(xr) < 0.5f && fabs(xc) < 0.5f) {
|
||
|
|
|
||
|
|
int fstep = m_filter - b_filter;
|
||
|
|
|
||
|
|
(*pos).x = (float)((c + xc) * fstep);
|
||
|
|
(*pos).y = (float)((c + xr) * fstep);
|
||
|
|
*det_scale = (float)(0.1333f) * (m_filter + (xi * fstep));
|
||
|
|
|
||
|
|
int s = m_width / t_width;
|
||
|
|
*laplacian = pop_laplacian(mlaplacian, c * s, r * s, m_width);
|
||
|
|
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
return false;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
bool is_extremum(
|
||
|
|
int r,
|
||
|
|
int c,
|
||
|
|
read_only image2d_t t,
|
||
|
|
int t_width,
|
||
|
|
int t_height,
|
||
|
|
int t_step,
|
||
|
|
int t_filter,
|
||
|
|
read_only image2d_t m,
|
||
|
|
int m_width,
|
||
|
|
int m_height,
|
||
|
|
read_only image2d_t b,
|
||
|
|
int b_width,
|
||
|
|
int b_height,
|
||
|
|
float tresh
|
||
|
|
)
|
||
|
|
{
|
||
|
|
int border = (t_filter + 1) / (2 * t_step);
|
||
|
|
|
||
|
|
if(r <= border || r >= t_height - border || c <= border || c >= t_width - border) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
int mscale = m_width / t_width;
|
||
|
|
|
||
|
|
float candidate = pop_response(m, c, r, m_width, mscale);
|
||
|
|
if(candidate < tresh) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
// If any response in 3x3x3 is greater candidate not maximum
|
||
|
|
float localMax = getResponse(t, c-1, r-1, t_width, 1);
|
||
|
|
localMax = fmax(localMax, getResponse(t, c, r-1, t_width, 1));
|
||
|
|
localMax = fmax(localMax, getResponse(t, c+1, r-1, t_width, 1));
|
||
|
|
localMax = fmax(localMax, getResponse(t, c-1, r, t_width, 1));
|
||
|
|
localMax = fmax(localMax, getResponse(t, c, r, t_width, 1));
|
||
|
|
localMax = fmax(localMax, getResponse(t, c+1, r, t_width, 1));
|
||
|
|
localMax = fmax(localMax, getResponse(t, c-1, r+1, t_width, 1));
|
||
|
|
localMax = fmax(localMax, getResponse(t, c, r+1, t_width, 1));
|
||
|
|
localMax = fmax(localMax, getResponse(t, c+1, r+1, t_width, 1));
|
||
|
|
|
||
|
|
int bScale = b_width/t_width;
|
||
|
|
|
||
|
|
localMax = fmax(localMax, getResponse(b, c-1, r-1, b_width, bScale));
|
||
|
|
localMax = fmax(localMax, getResponse(b, c, r-1, b_width, bScale));
|
||
|
|
localMax = fmax(localMax, getResponse(b, c+1, r-1, b_width, bScale));
|
||
|
|
localMax = fmax(localMax, getResponse(b, c-1, r, b_width, bScale));
|
||
|
|
localMax = fmax(localMax, getResponse(b, c, r, b_width, bScale));
|
||
|
|
localMax = fmax(localMax, getResponse(b, c+1, r, b_width, bScale));
|
||
|
|
localMax = fmax(localMax, getResponse(b, c-1, r+1, b_width, bScale));
|
||
|
|
localMax = fmax(localMax, getResponse(b, c, r+1, b_width, bScale));
|
||
|
|
localMax = fmax(localMax, getResponse(b, c+1, r+1, b_width, bScale));
|
||
|
|
|
||
|
|
//int mScale = m_width/t_width;
|
||
|
|
|
||
|
|
localMax = fmax(localMax, getResponse(m, c-1, r-1, m_width, mScale));
|
||
|
|
localMax = fmax(localMax, getResponse(m, c, r-1, m_width, mScale));
|
||
|
|
localMax = fmax(localMax, getResponse(m, c+1, r-1, m_width, mScale));
|
||
|
|
localMax = fmax(localMax, getResponse(m, c-1, r, m_width, mScale));
|
||
|
|
// This is the candidate pixel
|
||
|
|
localMax = fmax(localMax, getResponse(m, c+1, r, m_width, mScale));
|
||
|
|
localMax = fmax(localMax, getResponse(m, c-1, r+1, m_width, mScale));
|
||
|
|
localMax = fmax(localMax, getResponse(m, c, r+1, m_width, mScale));
|
||
|
|
localMax = fmax(localMax, getResponse(m, c+1, r+1, m_width, mScale));
|
||
|
|
|
||
|
|
// If localMax > candidate, candidate is not the local maxima
|
||
|
|
if(localMax > candidate) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
return true;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
__kernel void suppress_non_max(
|
||
|
|
read_only image2d_t tResponse,
|
||
|
|
int t_width,
|
||
|
|
int t_height,
|
||
|
|
int t_filter,
|
||
|
|
int t_step,
|
||
|
|
read_only image2d_t mResponse,
|
||
|
|
read_only image2d_t mLaplacian,
|
||
|
|
int m_width,
|
||
|
|
int m_height,
|
||
|
|
int m_filter,
|
||
|
|
read_only image2d_t bResponse,
|
||
|
|
int b_width;
|
||
|
|
int b_height,
|
||
|
|
int b_filter,
|
||
|
|
__global int* pts_cnt,
|
||
|
|
__global float2* pix_pos,
|
||
|
|
__global float* scale,
|
||
|
|
__global int* laplacian,
|
||
|
|
int max_pts,
|
||
|
|
float tresh
|
||
|
|
)
|
||
|
|
{
|
||
|
|
int r = get_global_id(0);
|
||
|
|
int c = get_global_id(1);
|
||
|
|
|
||
|
|
float2 pixpos;
|
||
|
|
float s;
|
||
|
|
int lap;
|
||
|
|
|
||
|
|
|
||
|
|
if(is_extremum(r, c, tResponse, t_width, t_height, t_step, t_filter, mResponse, m_width, m_height, bResponse, b_width, b_height, tresh)) {
|
||
|
|
|
||
|
|
if(interpolate_extremum(r, c, pts_cnt, &pixpos, &s, &lap, tResponse, t_width, t_height, t_step, mResponse, mLaplacian, m_width, m_height, m_filter, bResponse, b_width, b_height, b_filter)) {
|
||
|
|
|
||
|
|
int indx = atom_add(&pts_cnt[0],1);
|
||
|
|
if(indx < max_pts) {
|
||
|
|
pix_pos[indx] = pix_pos;
|
||
|
|
scale[indx] = s;
|
||
|
|
laplacian[indx] = lap;
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|