RefractiveIndex/dviid/hessian.cl

373 lines
13 KiB
Common Lisp
Raw Normal View History

/*
~ copyright (c) 2011 dviid
~ contact: dviid@labs.ciid.dk
+ redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ > redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ > redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
const sampler_t smp_adrs = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
float box_integral(read_only image2d_t src, int width, int height, int row, int col, int nbrrows, int nbrcols)
{
float A = 0.0f;
float B = 0.0f;
float C = 0.0f;
float D = 0.0f;
int r0 = min(row, height) - 1;
int c0 = min(col, width) - 1;
int r1 = min(row + nbrrows, height) - 1;
int c1 = min(col + nbrcols, width) - 1;
A = read_imagef(src, smp, (int2)(c0, r0)).x;
B = read_imagef(src, smp, (int2)(c1, r0)).x;
C = read_imagef(src, smp, (int2)(c0, r1)).x;
D = read_imagef(src, smp, (int2)(c1, r1)).x;
return max(0.0f, A - B - C + D);
}
__kernel void hessian_det(
read_only image2d_t src,
int width,
int height,
write_only image2d_t determinant,
write_only image2d_t laplacians,
int layer_width,
int layer_height,
int step,
int filter)
{
int l, w, b;
float Dxx, Dxy, Dyy, inverse;
int idx = get_global_id(0);
int idy = get_global_id(1);
w = filter;
l = w / 3;
b = (w - 1) / 2 + 1
inverse = 1.0f / (w * w);
int c = idx * step;
int r = idy * step;
if(r >= height || c >= width) return;
Dxx = box_integral(src, width, height, r - l + 1, c - b, 2 * l - 1, w) -
box_integral(src, width, height, r - l + 1, c - l / 2, 2 * l - 1, l) * 3;
Dxy = box_integral(src, width, height, r - l, c + 1, l, l) +
box_integral(src, width, height, r + 1, c - l, l, l) -
box_integral(src, width, height, r - 1, c - l, l, l) -
box_integral(src, widht, height, r + 1, c + 1, l, l);
DYY = box_integral(src, width, height, r - b, c - l + 1, w, 2 * l - 1) -
box_integral(src, width, height, r - l / 2, c - l + 1, l, 2 * l -1) * 3;
Dxx += inverse;
Dxy += inverse;
Dyy += inverse;
float4 det = {0.0f, 0.0f, 0.0f, 0.0f};
det.x = (Dxx * Dyy - 0.81f * Dxy * Dxy);
int4 lap = {0, 0, 0, 0};
lap.x = (Dxx + Dyy >= 0 ? 1 : 0);
write_imagef(determinant, (int2)(idx, idy), det);
write_imagef(laplacians, (int2)(idx, idy), lap);
}
int pop_laplacian(read_only image2d_t layer, int c, int r, int width)
{
int lap;
lap = read_imagei(layer, smp_adrs, (int2)(c,r)).x;
return lap;
}
float pop_response(read_only image2d_t layer, int c, int r, int width, int scale)
{
float resp;
resp = read_imagef(layer, smp_adrs, (int2)(c*scale, r*scale)).x;
return resp;
}
bool interpolate_extremum(
int r,
int c,
__global int* pts_cnt;
float2* pos,
float* det_scale
int* laplacian,
read_only image2d_t t,
int t_width,
int t_height,
int t_step,
read_only image2d_t m,
read_only image2d_t mlaplacian,
int m_width,
int m_height,
int m_filter,
read_only image2d_t b,
int b_width,
int b_height,
int b_filter
)
{
// 3D derivatives
int mscale = (m_width / m_height);
int bscale = (b_width / b_height);
float Dx, Dy, Dz;
Dx = (pop_response(m, c+1, r, m_width, mscale) -
pop_response(m, c-1, r, m_width, mscale)) / 2.0f;
Dy = (pop_response(m, c, r+1, m_width, mscale) -
pop_response(m, c, r-1, m_width, mscale)) / 2.0f;
Dz = (pop_response(t, c, r, t_width, 1) -
pop_response(b, c, r, b_width, bscale)) / 2.0f;
// inverse hessian
float v, Dxx, Dyy, Dzz, Dxy, Dxz, Dyz;
v = pop_response(m, r, c, m_width, mscale);
Dxx = pop_response(m, c+1, r, m_width, mscale) +
pop_response(m, c-1, r, m_width, mscale) - 2.0f * v;
Dyy = pop_response(m, c, r+1, m_width, mscale) +
pop_response(m, c, r-1, m_width, mscale) - 2.0f * v;
Dxy = (pop_response(m, c+1, r+1, m_width, mscale) -
pop_response(m, c-1, r+1, m_width, mscale) -
pop_response(m, c+1, r-1, m_width, mscale) +
pop_response(m, c-1, r-1, m_width, mscale)) / 4.0f;
Dzz = pop_response(t, c, r, t_width, 1) -
pop_response(b, c, r, b_width, bscale) - 2.0f * v;
Dxz = (pop_response(t, c+1, r, t_width, 1) -
pop_response(t, c-1, r, t_width, 1) -
pop_response(b, c+1, r, b_width, bscale) +
pop_response(b, c-1, r, b_width, bscale)) / 4.0f;
Dyz = (pop_response(t, c, r+1, t_width, 1) -
pop_response(t, c, r-1, t_width, 1) -
pop_response(b, c, r+1, b_width, bscale) +
pop_response(b, c, r-1, b_width, bscale)) / 4.0f;
float det = Dxx * (Dyy*Dzz - Dyz*Dyz) -
Dxy * (Dxy*Dzz - Dyz*Dxz) +
Dxz * (Dxy*Dyz - Dyy*Dxz);
float invdet = 1.0f / det;
float invDxx = (Dyy*Dzz-Dyz*Dyz) * invdet;
float invDxy = -(Dxy*Dzz-Dyz*Dxz) * invdet;
float invDxz = (Dxy*Dyz-Dyy*Dxz) * invdet;
float invDyx = -(Dxy*Dzz-Dxz*Dyz) * invdet;
float invDyy = (Dxx*Dzz-Dxz*Dxz) * invdet;
float invDyz = -(Dxx*Dyz-Dxy*Dxz) * invdet;
float invDzx = (Dxy*Dyz-Dxz*Dyy) * invdet;
float invDzy = -(Dxx*Dyz-Dxz*Dxy) * invdet;
float invDzz = (Dxx*Dyy-Dxy*Dxy) * invdet;
// derivative * hessian
float xi = 0.0f, xr = 0.0f, xc = 0.0f;
xc -= invDxx * Dx;
xc -= invDxy * Dy;
xc -= invDxz * Dz;
xr -= invDyx * Dx;
xr -= invDyy * Dy;
xr -= invDyz * Dz;
xc -= invDzx * Dx;
xc -= invDzy * Dy;
xc -= invDzz * Dz;
// extremum??
if(fabs(xi) < 0.5f && fabs(xr) < 0.5f && fabs(xc) < 0.5f) {
int fstep = m_filter - b_filter;
(*pos).x = (float)((c + xc) * fstep);
(*pos).y = (float)((c + xr) * fstep);
*det_scale = (float)(0.1333f) * (m_filter + (xi * fstep));
int s = m_width / t_width;
*laplacian = pop_laplacian(mlaplacian, c * s, r * s, m_width);
return true;
}
return false;
}
bool is_extremum(
int r,
int c,
read_only image2d_t t,
int t_width,
int t_height,
int t_step,
int t_filter,
read_only image2d_t m,
int m_width,
int m_height,
read_only image2d_t b,
int b_width,
int b_height,
float tresh
)
{
int border = (t_filter + 1) / (2 * t_step);
if(r <= border || r >= t_height - border || c <= border || c >= t_width - border) {
return false;
}
int mscale = m_width / t_width;
float candidate = pop_response(m, c, r, m_width, mscale);
if(candidate < tresh) {
return false;
}
// If any response in 3x3x3 is greater candidate not maximum
float localMax = getResponse(t, c-1, r-1, t_width, 1);
localMax = fmax(localMax, getResponse(t, c, r-1, t_width, 1));
localMax = fmax(localMax, getResponse(t, c+1, r-1, t_width, 1));
localMax = fmax(localMax, getResponse(t, c-1, r, t_width, 1));
localMax = fmax(localMax, getResponse(t, c, r, t_width, 1));
localMax = fmax(localMax, getResponse(t, c+1, r, t_width, 1));
localMax = fmax(localMax, getResponse(t, c-1, r+1, t_width, 1));
localMax = fmax(localMax, getResponse(t, c, r+1, t_width, 1));
localMax = fmax(localMax, getResponse(t, c+1, r+1, t_width, 1));
int bScale = b_width/t_width;
localMax = fmax(localMax, getResponse(b, c-1, r-1, b_width, bScale));
localMax = fmax(localMax, getResponse(b, c, r-1, b_width, bScale));
localMax = fmax(localMax, getResponse(b, c+1, r-1, b_width, bScale));
localMax = fmax(localMax, getResponse(b, c-1, r, b_width, bScale));
localMax = fmax(localMax, getResponse(b, c, r, b_width, bScale));
localMax = fmax(localMax, getResponse(b, c+1, r, b_width, bScale));
localMax = fmax(localMax, getResponse(b, c-1, r+1, b_width, bScale));
localMax = fmax(localMax, getResponse(b, c, r+1, b_width, bScale));
localMax = fmax(localMax, getResponse(b, c+1, r+1, b_width, bScale));
//int mScale = m_width/t_width;
localMax = fmax(localMax, getResponse(m, c-1, r-1, m_width, mScale));
localMax = fmax(localMax, getResponse(m, c, r-1, m_width, mScale));
localMax = fmax(localMax, getResponse(m, c+1, r-1, m_width, mScale));
localMax = fmax(localMax, getResponse(m, c-1, r, m_width, mScale));
// This is the candidate pixel
localMax = fmax(localMax, getResponse(m, c+1, r, m_width, mScale));
localMax = fmax(localMax, getResponse(m, c-1, r+1, m_width, mScale));
localMax = fmax(localMax, getResponse(m, c, r+1, m_width, mScale));
localMax = fmax(localMax, getResponse(m, c+1, r+1, m_width, mScale));
// If localMax > candidate, candidate is not the local maxima
if(localMax > candidate) {
return false;
}
return true;
}
__kernel void suppress_non_max(
read_only image2d_t tResponse,
int t_width,
int t_height,
int t_filter,
int t_step,
read_only image2d_t mResponse,
read_only image2d_t mLaplacian,
int m_width,
int m_height,
int m_filter,
read_only image2d_t bResponse,
int b_width;
int b_height,
int b_filter,
__global int* pts_cnt,
__global float2* pix_pos,
__global float* scale,
__global int* laplacian,
int max_pts,
float tresh
)
{
int r = get_global_id(0);
int c = get_global_id(1);
float2 pixpos;
float s;
int lap;
if(is_extremum(r, c, tResponse, t_width, t_height, t_step, t_filter, mResponse, m_width, m_height, bResponse, b_width, b_height, tresh)) {
if(interpolate_extremum(r, c, pts_cnt, &pixpos, &s, &lap, tResponse, t_width, t_height, t_step, mResponse, mLaplacian, m_width, m_height, m_filter, bResponse, b_width, b_height, b_filter)) {
int indx = atom_add(&pts_cnt[0],1);
if(indx < max_pts) {
pix_pos[indx] = pix_pos;
scale[indx] = s;
laplacian[indx] = lap;
}
}
}
}