shaders all analysis

all analysis are now shadered
2012-05-25 15:27:49 +02:00
parent 2699edc3ac
commit 80847c5439
24 changed files with 1433 additions and 1081 deletions
@@ -0,0 +1,372 @@
+/*
+ ~ copyright (c) 2011 dviid
+ ~ contact: dviid@labs.ciid.dk 
+ 
+ + redistribution and use in source and binary forms, with or without
+ + modification, are permitted provided that the following conditions
+ + are met:
+ +  > redistributions of source code must retain the above copyright
+ +    notice, this list of conditions and the following disclaimer.
+ +  > redistributions in binary form must reproduce the above copyright
+ +    notice, this list of conditions and the following disclaimer in
+ +    the documentation and/or other materials provided with the
+ +    distribution.
+ 
+ + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ + AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ + SUCH DAMAGE.
+ 
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+const sampler_t smp_adrs = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
+
+
+float box_integral(read_only image2d_t src, int width, int height, int row, int col, int nbrrows, int nbrcols)
+{
+    float A = 0.0f;
+    float B = 0.0f;
+    float C = 0.0f;
+    float D = 0.0f;
+    
+    int r0 = min(row, height) - 1;
+    int c0 = min(col, width) - 1;
+    int r1 = min(row + nbrrows, height) - 1;
+    int c1 = min(col + nbrcols, width) - 1;
+    
+    A = read_imagef(src, smp, (int2)(c0, r0)).x;
+    B = read_imagef(src, smp, (int2)(c1, r0)).x;
+    C = read_imagef(src, smp, (int2)(c0, r1)).x;
+    D = read_imagef(src, smp, (int2)(c1, r1)).x;
+    
+    return max(0.0f, A - B - C + D);
+    
+}
+
+
+__kernel void hessian_det(
+                          read_only image2d_t src,
+                          int width,
+                          int height,
+                          write_only image2d_t determinant,
+                          write_only image2d_t laplacians,
+                          int layer_width,
+                          int layer_height,
+                          int step,
+                          int filter)
+{
+    int l, w, b;
+    float Dxx, Dxy, Dyy, inverse;
+    
+    int idx = get_global_id(0);
+    int idy = get_global_id(1);
+    
+    w = filter;
+    l = w / 3;
+    b = (w - 1) / 2 + 1
+    inverse = 1.0f / (w * w);
+    
+    int c = idx * step;
+    int r = idy * step;
+    
+    if(r >= height || c >= width) return;
+    
+    Dxx = box_integral(src, width, height, r - l + 1, c - b, 2 * l - 1, w) -
+          box_integral(src, width, height, r - l + 1, c - l / 2, 2 * l - 1, l) * 3;
+    
+    Dxy = box_integral(src, width, height, r - l, c + 1, l, l) +
+          box_integral(src, width, height, r + 1, c - l, l, l) -
+          box_integral(src, width, height, r - 1, c - l, l, l) -
+          box_integral(src, widht, height, r + 1, c + 1, l, l);
+    
+    DYY = box_integral(src, width, height, r - b, c - l + 1, w, 2 * l - 1) -
+          box_integral(src, width, height, r - l / 2, c - l + 1, l, 2 * l -1) * 3;
+    
+    Dxx += inverse; 
+    Dxy += inverse; 
+    Dyy += inverse; 
+    
+    float4 det = {0.0f, 0.0f, 0.0f, 0.0f};
+    det.x = (Dxx * Dyy - 0.81f * Dxy * Dxy);
+    
+    int4 lap = {0, 0, 0, 0};
+    lap.x = (Dxx + Dyy >= 0 ? 1 : 0);
+    
+    write_imagef(determinant, (int2)(idx, idy), det);
+    write_imagef(laplacians, (int2)(idx, idy), lap);
+    
+}
+
+int pop_laplacian(read_only image2d_t layer, int c, int r, int width)
+{
+    int lap;
+    lap = read_imagei(layer, smp_adrs, (int2)(c,r)).x;
+    return lap;
+}
+
+float pop_response(read_only image2d_t layer, int c, int r, int width, int scale)
+{
+    float resp;
+    resp = read_imagef(layer, smp_adrs, (int2)(c*scale, r*scale)).x;
+    return resp;
+}
+
+bool interpolate_extremum(
+                          int r,
+                          int c,
+                __global int* pts_cnt;
+                      float2* pos,
+                       float* det_scale
+                         int* laplacian,
+          read_only image2d_t t,
+                          int t_width,
+                          int t_height,
+                          int t_step,
+          read_only image2d_t m,
+          read_only image2d_t mlaplacian,
+                          int m_width,
+                          int m_height,
+                          int m_filter,
+          read_only image2d_t b,
+                          int b_width,
+                          int b_height,
+                          int b_filter
+                          )
+{
+    
+    // 3D derivatives
+    
+    int mscale = (m_width / m_height);
+    int bscale = (b_width / b_height);
+    
+    float Dx, Dy, Dz;
+    
+    Dx = (pop_response(m, c+1, r,   m_width, mscale) -
+          pop_response(m, c-1, r,   m_width, mscale)) / 2.0f;
+    Dy = (pop_response(m, c,   r+1, m_width, mscale) -
+          pop_response(m, c,   r-1, m_width, mscale)) / 2.0f;
+    
+    Dz = (pop_response(t, c, r, t_width, 1) - 
+          pop_response(b, c, r, b_width, bscale)) / 2.0f;
+          
+    // inverse hessian
+    
+    float v, Dxx, Dyy, Dzz, Dxy, Dxz, Dyz;
+    
+    v = pop_response(m, r, c, m_width, mscale);
+    
+    Dxx = pop_response(m, c+1, r,   m_width, mscale) + 
+          pop_response(m, c-1, r,   m_width, mscale) - 2.0f * v;
+    
+    Dyy = pop_response(m, c,   r+1, m_width, mscale) +
+          pop_response(m, c,   r-1, m_width, mscale) - 2.0f * v;
+        
+    Dxy = (pop_response(m, c+1, r+1, m_width, mscale) -
+           pop_response(m, c-1, r+1, m_width, mscale) -
+           pop_response(m, c+1, r-1, m_width, mscale) +
+           pop_response(m, c-1, r-1, m_width, mscale)) / 4.0f;
+    
+    Dzz = pop_response(t, c, r, t_width, 1) - 
+          pop_response(b, c, r, b_width, bscale) - 2.0f * v;
+    
+    Dxz = (pop_response(t, c+1, r, t_width, 1) -
+           pop_response(t, c-1, r, t_width, 1) -
+           pop_response(b, c+1, r, b_width, bscale) +
+           pop_response(b, c-1, r, b_width, bscale)) / 4.0f;
+    
+    Dyz = (pop_response(t, c, r+1, t_width, 1) -
+           pop_response(t, c, r-1, t_width, 1) -
+           pop_response(b, c, r+1, b_width, bscale) +
+           pop_response(b, c, r-1, b_width, bscale)) / 4.0f;
+    
+    float det = Dxx * (Dyy*Dzz - Dyz*Dyz) -
+                Dxy * (Dxy*Dzz - Dyz*Dxz) +
+                Dxz * (Dxy*Dyz - Dyy*Dxz);
+
+    float invdet = 1.0f / det;
+    
+    float invDxx =  (Dyy*Dzz-Dyz*Dyz) * invdet;
+    float invDxy = -(Dxy*Dzz-Dyz*Dxz) * invdet;
+    float invDxz =  (Dxy*Dyz-Dyy*Dxz) * invdet;
+    float invDyx = -(Dxy*Dzz-Dxz*Dyz) * invdet;
+    float invDyy =  (Dxx*Dzz-Dxz*Dxz) * invdet;
+    float invDyz = -(Dxx*Dyz-Dxy*Dxz) * invdet;
+    float invDzx =  (Dxy*Dyz-Dxz*Dyy) * invdet;
+    float invDzy = -(Dxx*Dyz-Dxz*Dxy) * invdet;
+    float invDzz =  (Dxx*Dyy-Dxy*Dxy) * invdet;
+    
+    // derivative * hessian
+    
+    float xi = 0.0f, xr = 0.0f, xc = 0.0f;
+    
+    xc -= invDxx * Dx;
+    xc -= invDxy * Dy;
+    xc -= invDxz * Dz;
+    
+    xr -= invDyx * Dx;
+    xr -= invDyy * Dy;
+    xr -= invDyz * Dz;
+    
+    xc -= invDzx * Dx;
+    xc -= invDzy * Dy;
+    xc -= invDzz * Dz;
+    
+    // extremum??
+    if(fabs(xi) < 0.5f && fabs(xr) < 0.5f && fabs(xc) < 0.5f) {
+        
+        int fstep = m_filter - b_filter;
+        
+        (*pos).x = (float)((c + xc) * fstep);
+        (*pos).y = (float)((c + xr) * fstep);
+        *det_scale = (float)(0.1333f) * (m_filter + (xi * fstep));
+        
+        int s = m_width / t_width;
+        *laplacian = pop_laplacian(mlaplacian, c * s, r * s, m_width);
+        
+        return true;
+    }
+    
+    return false;
+        
+}
+
+bool is_extremum(
+                 int r,
+                 int c,
+                 read_only image2d_t t,
+                 int t_width,
+                 int t_height,
+                 int t_step,
+                 int t_filter,
+                 read_only image2d_t m,
+                 int m_width,
+                 int m_height,
+                 read_only image2d_t b,
+                 int b_width,
+                 int b_height,
+                 float tresh
+                 )
+{
+    int border = (t_filter + 1) / (2 * t_step);
+    
+    if(r <= border || r >= t_height - border || c <= border || c >= t_width - border) {
+        return false;
+    }
+ 
+    int mscale = m_width / t_width;
+    
+    float candidate = pop_response(m, c, r, m_width, mscale);
+    if(candidate < tresh) {
+        return false;
+    }
+    
+    // If any response in 3x3x3 is greater candidate not maximum
+    float localMax =          getResponse(t, c-1, r-1, t_width, 1);
+    localMax = fmax(localMax, getResponse(t, c,   r-1, t_width, 1));
+    localMax = fmax(localMax, getResponse(t, c+1, r-1, t_width, 1));
+    localMax = fmax(localMax, getResponse(t, c-1, r,   t_width, 1));
+    localMax = fmax(localMax, getResponse(t, c,   r,   t_width, 1));
+    localMax = fmax(localMax, getResponse(t, c+1, r,   t_width, 1));
+    localMax = fmax(localMax, getResponse(t, c-1, r+1, t_width, 1));
+    localMax = fmax(localMax, getResponse(t, c,   r+1, t_width, 1));
+    localMax = fmax(localMax, getResponse(t, c+1, r+1, t_width, 1));
+    
+    int bScale = b_width/t_width;
+    
+    localMax = fmax(localMax, getResponse(b, c-1, r-1, b_width, bScale));
+    localMax = fmax(localMax, getResponse(b, c,   r-1, b_width, bScale));
+    localMax = fmax(localMax, getResponse(b, c+1, r-1, b_width, bScale));
+    localMax = fmax(localMax, getResponse(b, c-1, r,   b_width, bScale));
+    localMax = fmax(localMax, getResponse(b, c,   r,   b_width, bScale));
+    localMax = fmax(localMax, getResponse(b, c+1, r,   b_width, bScale));
+    localMax = fmax(localMax, getResponse(b, c-1, r+1, b_width, bScale));
+    localMax = fmax(localMax, getResponse(b, c,   r+1, b_width, bScale));
+    localMax = fmax(localMax, getResponse(b, c+1, r+1, b_width, bScale));
+    
+    //int mScale = m_width/t_width;
+    
+    localMax = fmax(localMax, getResponse(m, c-1, r-1, m_width, mScale));
+    localMax = fmax(localMax, getResponse(m, c,   r-1, m_width, mScale));
+    localMax = fmax(localMax, getResponse(m, c+1, r-1, m_width, mScale));
+    localMax = fmax(localMax, getResponse(m, c-1, r,   m_width, mScale));
+    // This is the candidate pixel
+    localMax = fmax(localMax, getResponse(m, c+1, r,   m_width, mScale));
+    localMax = fmax(localMax, getResponse(m, c-1, r+1, m_width, mScale));
+    localMax = fmax(localMax, getResponse(m, c,   r+1, m_width, mScale));
+    localMax = fmax(localMax, getResponse(m, c+1, r+1, m_width, mScale));
+    
+    // If localMax > candidate, candidate is not the local maxima
+    if(localMax > candidate) {
+        return false;
+    }
+    
+    return true;   
+    
+}
+
+__kernel void suppress_non_max(                               
+               read_only image2d_t tResponse,
+                               int t_width,
+                               int t_height,
+                               int t_filter,
+                               int t_step,
+               read_only image2d_t mResponse,                
+               read_only image2d_t mLaplacian,
+                               int m_width,
+                               int m_height,
+                               int m_filter,
+               read_only image2d_t bResponse,           
+                               int b_width;
+                               int b_height,
+                               int b_filter,
+                      __global int* pts_cnt,
+                   __global float2* pix_pos,
+                    __global float* scale,
+                      __global int* laplacian,
+                               int max_pts,
+                             float tresh                               
+                               )
+{
+    int r = get_global_id(0);
+    int c = get_global_id(1);
+    
+    float2 pixpos;
+    float s;
+    int lap;
+    
+    
+    if(is_extremum(r, c, tResponse, t_width, t_height, t_step, t_filter, mResponse, m_width, m_height, bResponse, b_width, b_height, tresh)) {
+        
+        if(interpolate_extremum(r, c, pts_cnt, &pixpos, &s, &lap, tResponse, t_width, t_height, t_step, mResponse, mLaplacian, m_width, m_height, m_filter, bResponse, b_width, b_height, b_filter)) {
+            
+            int indx = atom_add(&pts_cnt[0],1);
+            if(indx < max_pts) {
+                pix_pos[indx] = pix_pos;
+                scale[indx] = s;
+                laplacian[indx] = lap;
+            }
+            
+        }
+        
+    }
+}
+                               
+                               
+                          
+                          
+                          
+
+
+
+
@@ -0,0 +1,148 @@
+/*
+ ~ copyright (c) 2011 dviid
+ ~ contact: dviid@labs.ciid.dk 
+ 
+ + redistribution and use in source and binary forms, with or without
+ + modification, are permitted provided that the following conditions
+ + are met:
+ +  > redistributions of source code must retain the above copyright
+ +    notice, this list of conditions and the following disclaimer.
+ +  > redistributions in binary form must reproduce the above copyright
+ +    notice, this list of conditions and the following disclaimer in
+ +    the documentation and/or other materials provided with the
+ +    distribution.
+ 
+ + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ + AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ + SUCH DAMAGE.
+ 
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+const sampler_t smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+__kernel void sobel(read_only image2d_t src, write_only image2d_t dst)
+{
+    int2 coords = (int2) (get_global_id(0), get_global_id(1));
+    int2 dx = (int2)(1,0);
+    int2 dy = (int2)(0,1);
+    float Gx, Gy, G, theta, p;
+    
+    if(coords.x > get_image_width(dst) || coords.y > get_image_height(dst))
+        return;
+    
+    p = read_imagef(src, smp, coords - dx + dy).s0;
+    Gx = p;
+    Gy = p;
+    
+    p = read_imagef(src, smp, coords + dy).s0;
+    Gy += 2 * p;
+
+    p = read_imagef(src, smp, coords + dx + dy).s0;
+    Gx -= p;
+    Gy += p;
+    
+    p = read_imagef(src, smp, coords - dx).s0;
+    Gx += 2 * p;
+    
+    p = read_imagef(src, smp, coords + dx).s0;
+    Gx -= 2 * p;
+
+    p = read_imagef(src, smp, coords - dx - dy).s0;
+    Gx += p;
+    Gy -= p;
+    
+    p = read_imagef(src, smp, coords - dy).s0;
+    Gy -= 2 * p;
+    
+    p = read_imagef(src, smp, coords + dx - dy).s0;
+    Gx -= p;
+    Gy -= p;
+    
+    G = sqrt(Gx * Gx + Gy * Gy);
+    theta = atan(Gx / Gy);
+    
+    write_imagef(dst, coords, G);                
+    
+}
+
+__kernel void hgauss(read_only image2d_t src, write_only image2d_t dst, 
+                     global read_only float* weights, global float* offsets, const int nbr_weights)
+{
+    int2    coords = (int2) (get_global_id(0), get_global_id(1));
+    float2  src_coords = (float2) (get_global_id(0), get_global_id(1));
+    float4  pix = (float4)(0,0,0,0);
+
+    int i;
+    for(i = 0; i < nbr_weights; i++) {
+        pix += read_imagef(src, smp, src_coords + (float2) (offsets[i], 0.0f)) * weights[i];
+    }
+    
+    write_imagef(dst, coords, clamp(pix, 0.0f, 1.0f));
+}
+
+__kernel void vgauss(read_only image2d_t src, write_only image2d_t dst, 
+                     global read_only float* weights, global float* offsets, const int nbr_weights)
+{
+    int2    coords = (int2) (get_global_id(0), get_global_id(1));
+    float2  src_coords = (float2) (get_global_id(0), get_global_id(1));
+    float4  pix = (float4)(0,0,0,0);
+    
+    int i;
+    for(i = 0; i < nbr_weights; i++) {
+        pix += read_imagef(src, smp, src_coords + (float2) (0.0f, offsets[i])) * weights[i];
+    }
+    
+    write_imagef(dst, coords, clamp(pix, 0.0f, 1.0f));
+}
+
+
+
+__kernel void grey(read_only image2d_t src, write_only image2d_t dst)
+{
+    int2 coords = (int2) (get_global_id(0), get_global_id(1));
+    float4 color = read_imagef(src, smp, coords);
+	float luminance = 0.3f * color.x + 0.59 * color.y + 0.11 * color.z;
+	color = (float4)(luminance, luminance, luminance, 1.0f);
+	write_imagef(dst, coords, color);
+}
+
+__kernel void brightness(read_only image2d_t src, write_only image2d_t dst)
+{
+    float max = 0;
+    int2 coords = (int2) (get_global_id(0), get_global_id(1));
+    float4 color = read_imagef(src, smp, coords);
+
+    max = color.x;
+    if(color.y > max) { max = color.y; }
+    if(color.z > max) { max = color.z; }    
+    
+	write_imagef(dst, coords, max);
+}
+
+__kernel void lightness(read_only image2d_t src, write_only image2d_t dst)
+{
+    float l = 0;
+    int2 coords = (int2) (get_global_id(0), get_global_id(1));
+    float4 color = read_imagef(src, smp, coords);
+    
+    l = (color.x + color.y + color.z) / 3.0f;
+    
+	write_imagef(dst, coords, l);
+}
+
+
+
+__kernel void hist()
+{
+    
+}
@@ -0,0 +1,130 @@
+/*
+ ~ copyright (c) 2011 dviid
+ ~ contact: dviid@labs.ciid.dk 
+ 
+ + redistribution and use in source and binary forms, with or without
+ + modification, are permitted provided that the following conditions
+ + are met:
+ +  > redistributions of source code must retain the above copyright
+ +    notice, this list of conditions and the following disclaimer.
+ +  > redistributions in binary form must reproduce the above copyright
+ +    notice, this list of conditions and the following disclaimer in
+ +    the documentation and/or other materials provided with the
+ +    distribution.
+ 
+ + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ + AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ + SUCH DAMAGE.
+ 
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#define WORK_G_SIZE         64
+#define HALF_WORK_G_SIZE    (WORK_G_SIZE / 2)
+
+const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+__kernel void scan(read_only image2d_t src, write_only image2d_t dst, int rows, int cols)
+{
+    __local float data0[WORK_G_SIZE + HALF_WORK_G_SIZE];
+    __local float data1[WORK_G_SIZE + HALF_WORK_G_SIZE];
+    
+    int2 coords = (int2) (get_global_id(0), get_global_id(1));
+    int X = coords.x;
+    int Y = coords.x;
+    
+    if(coords.x < HALF_WORK_G_SIZE) {
+        data0[coords.x] = 0.0f;
+        data1[coords.x] = 0.0f;
+    }
+    
+    X += HALF_WORK_G_SIZE;    
+    float max_val = 0.0f;    
+    
+    int it = cols / WORK_G_SIZE;    
+    if(cols % WORK_G_SIZE != 0) {
+        it++;
+    }
+    
+    for(int i = 0; i < it; i++) {
+        
+        int col_offset = i * WORK_G_SIZE + coords.x;
+        
+        data0[X] = read_imagef(src, smp, (int2)(col_offset, Y)).x;        
+        barrier(CLK_LOCAL_MEM_FENCE);
+        
+        // 1
+        data1[X] = data0[X] + data0[X-1];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        
+        // 2
+        data0[X] = data1[X] + data1[X-2];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        
+        // 4
+        data1[X] = data0[X] + data0[X-4];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        
+        // 8
+        data0[X] = data1[X] + data1[X-8];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        
+        // 16
+        data1[X] = data0[X] + data0[X-16];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        
+        // 32
+        data0[X] = data1[X] + data1[X-32];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        
+        if(col_offset < cols) {
+            write_imagef(dst, (int2)(col_offset, Y), (float4)(data0[X] + max_val, 0.0f, 0.0f, 0.0f));
+        }
+        
+        max_val += data0[WORK_G_SIZE + HALF_WORK_G_SIZE - 1];
+    }
+    
+}
+
+__kernel void transpose(read_only image2d_t src, write_only image2d_t dst, int rows, int cols)
+{
+    
+    __local float buff[256];
+    
+    int2 coords = (int2) (get_global_id(0), get_global_id(1));
+    
+    int inX = coords.x;
+    int inY = coords.y;
+    
+    int lX = coords.x;
+    int lY = coords.y;
+    
+    int ginX = coords.x * 16 + lX;
+    int ginY = coords.y * 16 + lY;
+    
+    buff[lY * 16 + lX] = read_imagef(src, smp, (int2)(ginX, ginY)).x;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    
+    int outRows = rows;
+    int outCols = cols;
+    
+    int outX = inX;
+    int outY = inY;
+    
+    int goutX = coords.x * 16 + lX;
+    int goutY = coords.y * 16 + lY;
+    
+    if(goutX >= 0 && goutX < outCols && goutY >=0 && goutY < outRows) {
+        write_imagef(dst, (int2)(goutX, goutY), (float4)(buff[lX * 16 + lY], 0.0f, 0.0f, 0.0f));
+    }
+}
+
@@ -0,0 +1,18 @@
+#version 120
+
+uniform int           algo;
+uniform sampler2DRect tex0;
+
+void main() {
+
+    if(algo == 4) {
+        vec4 color = texture2DRect(tex0, gl_TexCoord[0].st);
+        float luminance = 0.3f * color.x + 0.59 * color.y + 0.11 * color.z;
+        //gl_FragColor = vec4(luminance, luminance, luminance, 1.0f);
+        gl_FragColor = vec4(1.0f, 1.0f, 1.0f, 0.5f);   
+        return;     
+    }
+
+    gl_FragColor = texture2DRect(tex0, gl_TexCoord[0].st);
+
+}
@@ -0,0 +1,127 @@
+#version 120
+#extension GL_EXT_geometry_shader4 : enable
+
+#define ALGO_1  1
+#define ALGO_2  2
+#define ALGO_3  3
+#define ALGO_4  4
+
+uniform int             algo;
+uniform float           scale;
+uniform sampler2DRect   tex0;
+
+float max_color(vec3 rgb)
+{
+    float max = rgb.r;
+    if(rgb.g > max) { max = rgb.g; }
+    if(rgb.b > max) { max = rgb.b; }  
+    return max;
+} 
+
+float min_color(vec3 rgb)
+{
+    float min = rgb.r;
+    if(rgb.g < min) { min = rgb.g; }
+    if(rgb.b < min) { min = rgb.b; }  
+    return min;
+} 
+
+void hue(in vec4 color, out float h)
+{
+	float max = max_color(color.rgb);	
+	float min = min_color(color.rgb);
+    if(max == min) { h = 0.f; }
+    
+	float hueSixth;
+	if(color.r == max) {
+		hueSixth = (color.g - color.b) / (max - min);
+		if(hueSixth < 0.f)
+			hueSixth += 6.f;
+	} else if (color.g == max) {
+		hueSixth = 2.f + (color.b - color.r) / (max - min);
+	} else {
+		hueSixth = 4.f + (color.r - color.g) / (max - min);
+	}
+    
+	h = 255.f * hueSixth / 6.f;    
+}
+
+void saturation(in vec4 color, out float s)
+{
+	float max = max_color(color.rgb);	
+	float min = min_color(color.rgb);
+    if(max == min) { s = 0.f; }
+    
+    s = 255.f * (max - min) / max;
+}
+
+void brightness(in vec4 color, out float b)
+{
+    b = max_color(color.rgb);
+}
+
+void toHSB(in vec3 rgb, out vec3 hsb) 
+{
+	float max = max_color(rgb);	
+	float min = min_color(rgb);
+	
+	if(max == min) {
+		hsb.x = 0.f;
+		hsb.y = 0.f;
+		hsb.z = 255.f * max;
+		return;
+	}
+	
+	float hueSixth;
+	if(rgb.r == max) {
+		hueSixth = (rgb.g - rgb.b) / (max - min);
+		if(hueSixth < 0.f)
+			hueSixth += 6.f;
+	} else if (rgb.g == max) {
+		hueSixth = 2.f + (rgb.b - rgb.r) / (max - min);
+	} else {
+		hueSixth = 4.f + (rgb.r - rgb.g) / (max - min);
+	}
+	hsb.x = 255.f * hueSixth / 6.f;
+	hsb.y = 255.f * (max - min) / max;
+	hsb.z = max;
+}
+
+void main()
+{
+
+    if(algo == ALGO_4 && mod(gl_PositionIn[0].y, 2.f) != 0) return;
+
+	for (int i = 0; i < gl_VerticesIn; i++)
+	{
+ 
+		gl_Position = gl_PositionIn[i];
+        
+        vec4 color0 = texture2DRect(tex0, gl_TexCoordIn[i][0].st);
+        
+        float depth = 0.f;
+        
+        if(algo == ALGO_1) {
+            brightness(color0, depth);
+        } 
+        else if(algo == ALGO_2) {
+            hue(color0, depth);
+        }
+        else if(algo == ALGO_3) {
+            saturation(color0, depth);
+        }
+        else if(algo == ALGO_4) {
+            brightness(color0, depth);
+        }
+        else {
+            brightness(color0, depth);
+        }                
+                
+        gl_Position = gl_ModelViewProjectionMatrix * vec4(gl_Position.x, gl_Position.y, depth * scale, 1.0);
+        gl_TexCoord[0] = gl_TexCoordIn[i][0];
+                
+		EmitVertex();        
+	}    
+    
+    
+}
@@ -0,0 +1,11 @@
+#version 120
+
+void main()
+{
+
+	gl_FrontColor =  gl_Color;
+	gl_TexCoord[0] = gl_MultiTexCoord0;
+	gl_Position = gl_Vertex;
+    
+    
+}