RefractiveIndex/dviid/hessian.cl

/*
 ~ copyright (c) 2011 dviid
 ~ contact: dviid@labs.ciid.dk 
 
 + redistribution and use in source and binary forms, with or without
 + modification, are permitted provided that the following conditions
 + are met:
 +  > redistributions of source code must retain the above copyright
 +    notice, this list of conditions and the following disclaimer.
 +  > redistributions in binary form must reproduce the above copyright
 +    notice, this list of conditions and the following disclaimer in
 +    the documentation and/or other materials provided with the
 +    distribution.
 
 + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 + AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 + SUCH DAMAGE.
 
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

const sampler_t smp_adrs = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;


float box_integral(read_only image2d_t src, int width, int height, int row, int col, int nbrrows, int nbrcols)
{
    float A = 0.0f;
    float B = 0.0f;
    float C = 0.0f;
    float D = 0.0f;
    
    int r0 = min(row, height) - 1;
    int c0 = min(col, width) - 1;
    int r1 = min(row + nbrrows, height) - 1;
    int c1 = min(col + nbrcols, width) - 1;
    
    A = read_imagef(src, smp, (int2)(c0, r0)).x;
    B = read_imagef(src, smp, (int2)(c1, r0)).x;
    C = read_imagef(src, smp, (int2)(c0, r1)).x;
    D = read_imagef(src, smp, (int2)(c1, r1)).x;
    
    return max(0.0f, A - B - C + D);
    
}


__kernel void hessian_det(
                          read_only image2d_t src,
                          int width,
                          int height,
                          write_only image2d_t determinant,
                          write_only image2d_t laplacians,
                          int layer_width,
                          int layer_height,
                          int step,
                          int filter)
{
    int l, w, b;
    float Dxx, Dxy, Dyy, inverse;
    
    int idx = get_global_id(0);
    int idy = get_global_id(1);
    
    w = filter;
    l = w / 3;
    b = (w - 1) / 2 + 1
    inverse = 1.0f / (w * w);
    
    int c = idx * step;
    int r = idy * step;
    
    if(r >= height || c >= width) return;
    
    Dxx = box_integral(src, width, height, r - l + 1, c - b, 2 * l - 1, w) -
          box_integral(src, width, height, r - l + 1, c - l / 2, 2 * l - 1, l) * 3;
    
    Dxy = box_integral(src, width, height, r - l, c + 1, l, l) +
          box_integral(src, width, height, r + 1, c - l, l, l) -
          box_integral(src, width, height, r - 1, c - l, l, l) -
          box_integral(src, widht, height, r + 1, c + 1, l, l);
    
    DYY = box_integral(src, width, height, r - b, c - l + 1, w, 2 * l - 1) -
          box_integral(src, width, height, r - l / 2, c - l + 1, l, 2 * l -1) * 3;
    
    Dxx += inverse; 
    Dxy += inverse; 
    Dyy += inverse; 
    
    float4 det = {0.0f, 0.0f, 0.0f, 0.0f};
    det.x = (Dxx * Dyy - 0.81f * Dxy * Dxy);
    
    int4 lap = {0, 0, 0, 0};
    lap.x = (Dxx + Dyy >= 0 ? 1 : 0);
    
    write_imagef(determinant, (int2)(idx, idy), det);
    write_imagef(laplacians, (int2)(idx, idy), lap);
    
}

int pop_laplacian(read_only image2d_t layer, int c, int r, int width)
{
    int lap;
    lap = read_imagei(layer, smp_adrs, (int2)(c,r)).x;
    return lap;
}

float pop_response(read_only image2d_t layer, int c, int r, int width, int scale)
{
    float resp;
    resp = read_imagef(layer, smp_adrs, (int2)(c*scale, r*scale)).x;
    return resp;
}

bool interpolate_extremum(
                          int r,
                          int c,
                __global int* pts_cnt;
                      float2* pos,
                       float* det_scale
                         int* laplacian,
          read_only image2d_t t,
                          int t_width,
                          int t_height,
                          int t_step,
          read_only image2d_t m,
          read_only image2d_t mlaplacian,
                          int m_width,
                          int m_height,
                          int m_filter,
          read_only image2d_t b,
                          int b_width,
                          int b_height,
                          int b_filter
                          )
{
    
    // 3D derivatives
    
    int mscale = (m_width / m_height);
    int bscale = (b_width / b_height);
    
    float Dx, Dy, Dz;
    
    Dx = (pop_response(m, c+1, r,   m_width, mscale) -
          pop_response(m, c-1, r,   m_width, mscale)) / 2.0f;
    Dy = (pop_response(m, c,   r+1, m_width, mscale) -
          pop_response(m, c,   r-1, m_width, mscale)) / 2.0f;
    
    Dz = (pop_response(t, c, r, t_width, 1) - 
          pop_response(b, c, r, b_width, bscale)) / 2.0f;
          
    // inverse hessian
    
    float v, Dxx, Dyy, Dzz, Dxy, Dxz, Dyz;
    
    v = pop_response(m, r, c, m_width, mscale);
    
    Dxx = pop_response(m, c+1, r,   m_width, mscale) + 
          pop_response(m, c-1, r,   m_width, mscale) - 2.0f * v;
    
    Dyy = pop_response(m, c,   r+1, m_width, mscale) +
          pop_response(m, c,   r-1, m_width, mscale) - 2.0f * v;
        
    Dxy = (pop_response(m, c+1, r+1, m_width, mscale) -
           pop_response(m, c-1, r+1, m_width, mscale) -
           pop_response(m, c+1, r-1, m_width, mscale) +
           pop_response(m, c-1, r-1, m_width, mscale)) / 4.0f;
    
    Dzz = pop_response(t, c, r, t_width, 1) - 
          pop_response(b, c, r, b_width, bscale) - 2.0f * v;
    
    Dxz = (pop_response(t, c+1, r, t_width, 1) -
           pop_response(t, c-1, r, t_width, 1) -
           pop_response(b, c+1, r, b_width, bscale) +
           pop_response(b, c-1, r, b_width, bscale)) / 4.0f;
    
    Dyz = (pop_response(t, c, r+1, t_width, 1) -
           pop_response(t, c, r-1, t_width, 1) -
           pop_response(b, c, r+1, b_width, bscale) +
           pop_response(b, c, r-1, b_width, bscale)) / 4.0f;
    
    float det = Dxx * (Dyy*Dzz - Dyz*Dyz) -
                Dxy * (Dxy*Dzz - Dyz*Dxz) +
                Dxz * (Dxy*Dyz - Dyy*Dxz);

    float invdet = 1.0f / det;
    
    float invDxx =  (Dyy*Dzz-Dyz*Dyz) * invdet;
    float invDxy = -(Dxy*Dzz-Dyz*Dxz) * invdet;
    float invDxz =  (Dxy*Dyz-Dyy*Dxz) * invdet;
    float invDyx = -(Dxy*Dzz-Dxz*Dyz) * invdet;
    float invDyy =  (Dxx*Dzz-Dxz*Dxz) * invdet;
    float invDyz = -(Dxx*Dyz-Dxy*Dxz) * invdet;
    float invDzx =  (Dxy*Dyz-Dxz*Dyy) * invdet;
    float invDzy = -(Dxx*Dyz-Dxz*Dxy) * invdet;
    float invDzz =  (Dxx*Dyy-Dxy*Dxy) * invdet;
    
    // derivative * hessian
    
    float xi = 0.0f, xr = 0.0f, xc = 0.0f;
    
    xc -= invDxx * Dx;
    xc -= invDxy * Dy;
    xc -= invDxz * Dz;
    
    xr -= invDyx * Dx;
    xr -= invDyy * Dy;
    xr -= invDyz * Dz;
    
    xc -= invDzx * Dx;
    xc -= invDzy * Dy;
    xc -= invDzz * Dz;
    
    // extremum??
    if(fabs(xi) < 0.5f && fabs(xr) < 0.5f && fabs(xc) < 0.5f) {
        
        int fstep = m_filter - b_filter;
        
        (*pos).x = (float)((c + xc) * fstep);
        (*pos).y = (float)((c + xr) * fstep);
        *det_scale = (float)(0.1333f) * (m_filter + (xi * fstep));
        
        int s = m_width / t_width;
        *laplacian = pop_laplacian(mlaplacian, c * s, r * s, m_width);
        
        return true;
    }
    
    return false;
        
}

bool is_extremum(
                 int r,
                 int c,
                 read_only image2d_t t,
                 int t_width,
                 int t_height,
                 int t_step,
                 int t_filter,
                 read_only image2d_t m,
                 int m_width,
                 int m_height,
                 read_only image2d_t b,
                 int b_width,
                 int b_height,
                 float tresh
                 )
{
    int border = (t_filter + 1) / (2 * t_step);
    
    if(r <= border || r >= t_height - border || c <= border || c >= t_width - border) {
        return false;
    }
 
    int mscale = m_width / t_width;
    
    float candidate = pop_response(m, c, r, m_width, mscale);
    if(candidate < tresh) {
        return false;
    }
    
    // If any response in 3x3x3 is greater candidate not maximum
    float localMax =          getResponse(t, c-1, r-1, t_width, 1);
    localMax = fmax(localMax, getResponse(t, c,   r-1, t_width, 1));
    localMax = fmax(localMax, getResponse(t, c+1, r-1, t_width, 1));
    localMax = fmax(localMax, getResponse(t, c-1, r,   t_width, 1));
    localMax = fmax(localMax, getResponse(t, c,   r,   t_width, 1));
    localMax = fmax(localMax, getResponse(t, c+1, r,   t_width, 1));
    localMax = fmax(localMax, getResponse(t, c-1, r+1, t_width, 1));
    localMax = fmax(localMax, getResponse(t, c,   r+1, t_width, 1));
    localMax = fmax(localMax, getResponse(t, c+1, r+1, t_width, 1));
    
    int bScale = b_width/t_width;
    
    localMax = fmax(localMax, getResponse(b, c-1, r-1, b_width, bScale));
    localMax = fmax(localMax, getResponse(b, c,   r-1, b_width, bScale));
    localMax = fmax(localMax, getResponse(b, c+1, r-1, b_width, bScale));
    localMax = fmax(localMax, getResponse(b, c-1, r,   b_width, bScale));
    localMax = fmax(localMax, getResponse(b, c,   r,   b_width, bScale));
    localMax = fmax(localMax, getResponse(b, c+1, r,   b_width, bScale));
    localMax = fmax(localMax, getResponse(b, c-1, r+1, b_width, bScale));
    localMax = fmax(localMax, getResponse(b, c,   r+1, b_width, bScale));
    localMax = fmax(localMax, getResponse(b, c+1, r+1, b_width, bScale));
    
    //int mScale = m_width/t_width;
    
    localMax = fmax(localMax, getResponse(m, c-1, r-1, m_width, mScale));
    localMax = fmax(localMax, getResponse(m, c,   r-1, m_width, mScale));
    localMax = fmax(localMax, getResponse(m, c+1, r-1, m_width, mScale));
    localMax = fmax(localMax, getResponse(m, c-1, r,   m_width, mScale));
    // This is the candidate pixel
    localMax = fmax(localMax, getResponse(m, c+1, r,   m_width, mScale));
    localMax = fmax(localMax, getResponse(m, c-1, r+1, m_width, mScale));
    localMax = fmax(localMax, getResponse(m, c,   r+1, m_width, mScale));
    localMax = fmax(localMax, getResponse(m, c+1, r+1, m_width, mScale));
    
    // If localMax > candidate, candidate is not the local maxima
    if(localMax > candidate) {
        return false;
    }
    
    return true;   
    
}

__kernel void suppress_non_max(                               
               read_only image2d_t tResponse,
                               int t_width,
                               int t_height,
                               int t_filter,
                               int t_step,
               read_only image2d_t mResponse,                
               read_only image2d_t mLaplacian,
                               int m_width,
                               int m_height,
                               int m_filter,
               read_only image2d_t bResponse,           
                               int b_width;
                               int b_height,
                               int b_filter,
                      __global int* pts_cnt,
                   __global float2* pix_pos,
                    __global float* scale,
                      __global int* laplacian,
                               int max_pts,
                             float tresh                               
                               )
{
    int r = get_global_id(0);
    int c = get_global_id(1);
    
    float2 pixpos;
    float s;
    int lap;
    
    
    if(is_extremum(r, c, tResponse, t_width, t_height, t_step, t_filter, mResponse, m_width, m_height, bResponse, b_width, b_height, tresh)) {
        
        if(interpolate_extremum(r, c, pts_cnt, &pixpos, &s, &lap, tResponse, t_width, t_height, t_step, mResponse, mLaplacian, m_width, m_height, m_filter, bResponse, b_width, b_height, b_filter)) {
            
            int indx = atom_add(&pts_cnt[0],1);
            if(indx < max_pts) {
                pix_pos[indx] = pix_pos;
                scale[indx] = s;
                laplacian[indx] = lap;
            }
            
        }
        
    }
}
shaders all analysis all analysis are now shadered 2012-05-25 15:27:49 +02:00			`/*`
			`~ copyright (c) 2011 dviid`
			`~ contact: dviid@labs.ciid.dk`

			`+ redistribution and use in source and binary forms, with or without`
			`+ modification, are permitted provided that the following conditions`
			`+ are met:`
			`+ > redistributions of source code must retain the above copyright`
			`+ notice, this list of conditions and the following disclaimer.`
			`+ > redistributions in binary form must reproduce the above copyright`
			`+ notice, this list of conditions and the following disclaimer in`
			`+ the documentation and/or other materials provided with the`
			`+ distribution.`

			`+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS`
			`+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE`
			`+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,`
			`+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,`
			`+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS`
			`+ OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED`
			`+ AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,`
			`+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT`
			`+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF`
			`+ SUCH DAMAGE.`

			`~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
			`*/`

			`const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE \| CLK_ADDRESS_CLAMP_TO_EDGE \| CLK_FILTER_NEAREST;`

			`const sampler_t smp_adrs = CLK_NORMALIZED_COORDS_FALSE \| CLK_ADDRESS_CLAMP \| CLK_FILTER_NEAREST;`


			`float box_integral(read_only image2d_t src, int width, int height, int row, int col, int nbrrows, int nbrcols)`
			`{`
			`float A = 0.0f;`
			`float B = 0.0f;`
			`float C = 0.0f;`
			`float D = 0.0f;`

			`int r0 = min(row, height) - 1;`
			`int c0 = min(col, width) - 1;`
			`int r1 = min(row + nbrrows, height) - 1;`
			`int c1 = min(col + nbrcols, width) - 1;`

			`A = read_imagef(src, smp, (int2)(c0, r0)).x;`
			`B = read_imagef(src, smp, (int2)(c1, r0)).x;`
			`C = read_imagef(src, smp, (int2)(c0, r1)).x;`
			`D = read_imagef(src, smp, (int2)(c1, r1)).x;`

			`return max(0.0f, A - B - C + D);`

			`}`


			`__kernel void hessian_det(`
			`read_only image2d_t src,`
			`int width,`
			`int height,`
			`write_only image2d_t determinant,`
			`write_only image2d_t laplacians,`
			`int layer_width,`
			`int layer_height,`
			`int step,`
			`int filter)`
			`{`
			`int l, w, b;`
			`float Dxx, Dxy, Dyy, inverse;`

			`int idx = get_global_id(0);`
			`int idy = get_global_id(1);`

			`w = filter;`
			`l = w / 3;`
			`b = (w - 1) / 2 + 1`
			`inverse = 1.0f / (w * w);`

			`int c = idx * step;`
			`int r = idy * step;`

			`if(r >= height \|\| c >= width) return;`

			`Dxx = box_integral(src, width, height, r - l + 1, c - b, 2 * l - 1, w) -`
			`box_integral(src, width, height, r - l + 1, c - l / 2, 2 * l - 1, l) * 3;`

			`Dxy = box_integral(src, width, height, r - l, c + 1, l, l) +`
			`box_integral(src, width, height, r + 1, c - l, l, l) -`
			`box_integral(src, width, height, r - 1, c - l, l, l) -`
			`box_integral(src, widht, height, r + 1, c + 1, l, l);`

			`DYY = box_integral(src, width, height, r - b, c - l + 1, w, 2 * l - 1) -`
			`box_integral(src, width, height, r - l / 2, c - l + 1, l, 2 * l -1) * 3;`

			`Dxx += inverse;`
			`Dxy += inverse;`
			`Dyy += inverse;`

			`float4 det = {0.0f, 0.0f, 0.0f, 0.0f};`
			`det.x = (Dxx * Dyy - 0.81f * Dxy * Dxy);`

			`int4 lap = {0, 0, 0, 0};`
			`lap.x = (Dxx + Dyy >= 0 ? 1 : 0);`

			`write_imagef(determinant, (int2)(idx, idy), det);`
			`write_imagef(laplacians, (int2)(idx, idy), lap);`

			`}`

			`int pop_laplacian(read_only image2d_t layer, int c, int r, int width)`
			`{`
			`int lap;`
			`lap = read_imagei(layer, smp_adrs, (int2)(c,r)).x;`
			`return lap;`
			`}`

			`float pop_response(read_only image2d_t layer, int c, int r, int width, int scale)`
			`{`
			`float resp;`
			`resp = read_imagef(layer, smp_adrs, (int2)(cscale, rscale)).x;`
			`return resp;`
			`}`

			`bool interpolate_extremum(`
			`int r,`
			`int c,`
			`__global int* pts_cnt;`
			`float2* pos,`
			`float* det_scale`
			`int* laplacian,`
			`read_only image2d_t t,`
			`int t_width,`
			`int t_height,`
			`int t_step,`
			`read_only image2d_t m,`
			`read_only image2d_t mlaplacian,`
			`int m_width,`
			`int m_height,`
			`int m_filter,`
			`read_only image2d_t b,`
			`int b_width,`
			`int b_height,`
			`int b_filter`
			`)`
			`{`

			`// 3D derivatives`

			`int mscale = (m_width / m_height);`
			`int bscale = (b_width / b_height);`

			`float Dx, Dy, Dz;`

			`Dx = (pop_response(m, c+1, r, m_width, mscale) -`
			`pop_response(m, c-1, r, m_width, mscale)) / 2.0f;`
			`Dy = (pop_response(m, c, r+1, m_width, mscale) -`
			`pop_response(m, c, r-1, m_width, mscale)) / 2.0f;`

			`Dz = (pop_response(t, c, r, t_width, 1) -`
			`pop_response(b, c, r, b_width, bscale)) / 2.0f;`

			`// inverse hessian`

			`float v, Dxx, Dyy, Dzz, Dxy, Dxz, Dyz;`

			`v = pop_response(m, r, c, m_width, mscale);`

			`Dxx = pop_response(m, c+1, r, m_width, mscale) +`
			`pop_response(m, c-1, r, m_width, mscale) - 2.0f * v;`

			`Dyy = pop_response(m, c, r+1, m_width, mscale) +`
			`pop_response(m, c, r-1, m_width, mscale) - 2.0f * v;`

			`Dxy = (pop_response(m, c+1, r+1, m_width, mscale) -`
			`pop_response(m, c-1, r+1, m_width, mscale) -`
			`pop_response(m, c+1, r-1, m_width, mscale) +`
			`pop_response(m, c-1, r-1, m_width, mscale)) / 4.0f;`

			`Dzz = pop_response(t, c, r, t_width, 1) -`
			`pop_response(b, c, r, b_width, bscale) - 2.0f * v;`

			`Dxz = (pop_response(t, c+1, r, t_width, 1) -`
			`pop_response(t, c-1, r, t_width, 1) -`
			`pop_response(b, c+1, r, b_width, bscale) +`
			`pop_response(b, c-1, r, b_width, bscale)) / 4.0f;`

			`Dyz = (pop_response(t, c, r+1, t_width, 1) -`
			`pop_response(t, c, r-1, t_width, 1) -`
			`pop_response(b, c, r+1, b_width, bscale) +`
			`pop_response(b, c, r-1, b_width, bscale)) / 4.0f;`

			`float det = Dxx * (DyyDzz - DyzDyz) -`
			`Dxy * (DxyDzz - DyzDxz) +`
			`Dxz * (DxyDyz - DyyDxz);`

			`float invdet = 1.0f / det;`

			`float invDxx = (DyyDzz-DyzDyz) * invdet;`
			`float invDxy = -(DxyDzz-DyzDxz) * invdet;`
			`float invDxz = (DxyDyz-DyyDxz) * invdet;`
			`float invDyx = -(DxyDzz-DxzDyz) * invdet;`
			`float invDyy = (DxxDzz-DxzDxz) * invdet;`
			`float invDyz = -(DxxDyz-DxyDxz) * invdet;`
			`float invDzx = (DxyDyz-DxzDyy) * invdet;`
			`float invDzy = -(DxxDyz-DxzDxy) * invdet;`
			`float invDzz = (DxxDyy-DxyDxy) * invdet;`

			`// derivative * hessian`

			`float xi = 0.0f, xr = 0.0f, xc = 0.0f;`

			`xc -= invDxx * Dx;`
			`xc -= invDxy * Dy;`
			`xc -= invDxz * Dz;`

			`xr -= invDyx * Dx;`
			`xr -= invDyy * Dy;`
			`xr -= invDyz * Dz;`

			`xc -= invDzx * Dx;`
			`xc -= invDzy * Dy;`
			`xc -= invDzz * Dz;`

			`// extremum??`
			`if(fabs(xi) < 0.5f && fabs(xr) < 0.5f && fabs(xc) < 0.5f) {`

			`int fstep = m_filter - b_filter;`

			`(pos).x = (float)((c + xc) fstep);`
			`(pos).y = (float)((c + xr) fstep);`
			`det_scale = (float)(0.1333f) (m_filter + (xi * fstep));`

			`int s = m_width / t_width;`
			`laplacian = pop_laplacian(mlaplacian, c s, r * s, m_width);`

			`return true;`
			`}`

			`return false;`

			`}`

			`bool is_extremum(`
			`int r,`
			`int c,`
			`read_only image2d_t t,`
			`int t_width,`
			`int t_height,`
			`int t_step,`
			`int t_filter,`
			`read_only image2d_t m,`
			`int m_width,`
			`int m_height,`
			`read_only image2d_t b,`
			`int b_width,`
			`int b_height,`
			`float tresh`
			`)`
			`{`
			`int border = (t_filter + 1) / (2 * t_step);`

			`if(r <= border \|\| r >= t_height - border \|\| c <= border \|\| c >= t_width - border) {`
			`return false;`
			`}`

			`int mscale = m_width / t_width;`

			`float candidate = pop_response(m, c, r, m_width, mscale);`
			`if(candidate < tresh) {`
			`return false;`
			`}`

			`// If any response in 3x3x3 is greater candidate not maximum`
			`float localMax = getResponse(t, c-1, r-1, t_width, 1);`
			`localMax = fmax(localMax, getResponse(t, c, r-1, t_width, 1));`
			`localMax = fmax(localMax, getResponse(t, c+1, r-1, t_width, 1));`
			`localMax = fmax(localMax, getResponse(t, c-1, r, t_width, 1));`
			`localMax = fmax(localMax, getResponse(t, c, r, t_width, 1));`
			`localMax = fmax(localMax, getResponse(t, c+1, r, t_width, 1));`
			`localMax = fmax(localMax, getResponse(t, c-1, r+1, t_width, 1));`
			`localMax = fmax(localMax, getResponse(t, c, r+1, t_width, 1));`
			`localMax = fmax(localMax, getResponse(t, c+1, r+1, t_width, 1));`

			`int bScale = b_width/t_width;`

			`localMax = fmax(localMax, getResponse(b, c-1, r-1, b_width, bScale));`
			`localMax = fmax(localMax, getResponse(b, c, r-1, b_width, bScale));`
			`localMax = fmax(localMax, getResponse(b, c+1, r-1, b_width, bScale));`
			`localMax = fmax(localMax, getResponse(b, c-1, r, b_width, bScale));`
			`localMax = fmax(localMax, getResponse(b, c, r, b_width, bScale));`
			`localMax = fmax(localMax, getResponse(b, c+1, r, b_width, bScale));`
			`localMax = fmax(localMax, getResponse(b, c-1, r+1, b_width, bScale));`
			`localMax = fmax(localMax, getResponse(b, c, r+1, b_width, bScale));`
			`localMax = fmax(localMax, getResponse(b, c+1, r+1, b_width, bScale));`

			`//int mScale = m_width/t_width;`

			`localMax = fmax(localMax, getResponse(m, c-1, r-1, m_width, mScale));`
			`localMax = fmax(localMax, getResponse(m, c, r-1, m_width, mScale));`
			`localMax = fmax(localMax, getResponse(m, c+1, r-1, m_width, mScale));`
			`localMax = fmax(localMax, getResponse(m, c-1, r, m_width, mScale));`
			`// This is the candidate pixel`
			`localMax = fmax(localMax, getResponse(m, c+1, r, m_width, mScale));`
			`localMax = fmax(localMax, getResponse(m, c-1, r+1, m_width, mScale));`
			`localMax = fmax(localMax, getResponse(m, c, r+1, m_width, mScale));`
			`localMax = fmax(localMax, getResponse(m, c+1, r+1, m_width, mScale));`

			`// If localMax > candidate, candidate is not the local maxima`
			`if(localMax > candidate) {`
			`return false;`
			`}`

			`return true;`

			`}`

			`__kernel void suppress_non_max(`
			`read_only image2d_t tResponse,`
			`int t_width,`
			`int t_height,`
			`int t_filter,`
			`int t_step,`
			`read_only image2d_t mResponse,`
			`read_only image2d_t mLaplacian,`
			`int m_width,`
			`int m_height,`
			`int m_filter,`
			`read_only image2d_t bResponse,`
			`int b_width;`
			`int b_height,`
			`int b_filter,`
			`__global int* pts_cnt,`
			`__global float2* pix_pos,`
			`__global float* scale,`
			`__global int* laplacian,`
			`int max_pts,`
			`float tresh`
			`)`
			`{`
			`int r = get_global_id(0);`
			`int c = get_global_id(1);`

			`float2 pixpos;`
			`float s;`
			`int lap;`


			`if(is_extremum(r, c, tResponse, t_width, t_height, t_step, t_filter, mResponse, m_width, m_height, bResponse, b_width, b_height, tresh)) {`

			`if(interpolate_extremum(r, c, pts_cnt, &pixpos, &s, &lap, tResponse, t_width, t_height, t_step, mResponse, mLaplacian, m_width, m_height, m_filter, bResponse, b_width, b_height, b_filter)) {`

			`int indx = atom_add(&pts_cnt[0],1);`
			`if(indx < max_pts) {`
			`pix_pos[indx] = pix_pos;`
			`scale[indx] = s;`
			`laplacian[indx] = lap;`
			`}`

			`}`

			`}`
			`}`