I use this shader to do convolution. It seems to work for the most part. However when the kernel size gets up to ~70 the output becomes corrupted. Any ideas why?
( nvidia 7800 GTX. Driver version 93.71 )
( And yes ‘kernel_weight’ can be factored but not in the final version of shader… )
uniform float kernel_ds;
uniform float kernel_dt;
uniform int kernel_size;
uniform sampler2D filter_kernel;
uniform float image_ds;
uniform float image_dt;
extern uniform sampler2D base_texture;
void convolve_func( inout vec4 color, in vec2 image_tex_coord_in )
{
int i, j;
vec2 image_tex_coord, kernel_tex_coord;
vec3 color_result;
float kernel_weight, tf_s, tf_t;
kernel_tex_coord.s = kernel_tex_coord.t = 0.0;
// Center pixel
kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;
color_result = texture2D( base_texture, image_tex_coord_in ).rgb * kernel_weight;
// x == 0 and y == 0 axes - excluding center
for ( i = 1; i < kernel_size; i++ ) {
kernel_tex_coord.s = i * kernel_ds;
kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;
tf_s = float( i ) * image_ds;
tf_t = float( i ) * image_ds;
// Right
image_tex_coord.s = image_tex_coord_in.s + tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;
// Left
image_tex_coord.s = image_tex_coord_in.s - tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;
image_tex_coord.s = image_tex_coord_in.s;
// Top
image_tex_coord.t = image_tex_coord_in.t + tf_t;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;
// Bottom
image_tex_coord.t = image_tex_coord_in.t - tf_t;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;
}
for ( i = 1; i < kernel_size; ++i ) {
image_tex_coord.s = image_tex_coord_in.s + image_ds;
image_tex_coord.t = image_tex_coord_in.t + ( image_dt * float( i ) );
kernel_tex_coord.s = kernel_ds;
kernel_tex_coord.t = kernel_dt * float( i );
tf_t = float( i ) * image_dt;
for ( j = 1; j < kernel_size; ++j ) {
kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;
tf_s = float( j ) * image_ds;
// Pixels above
image_tex_coord.t = image_tex_coord_in.t + tf_t;
// Above right
image_tex_coord.s = image_tex_coord_in.s + tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;
// Above left
image_tex_coord.s = image_tex_coord_in.s - tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;
// Pixels below
image_tex_coord.t = image_tex_coord_in.t - tf_t;
// Below right
image_tex_coord.s = image_tex_coord_in.s + tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;
// Below left
image_tex_coord.s = image_tex_coord_in.s - tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;
kernel_tex_coord.s += kernel_ds;
}
}
color = vec4( color_result, 1.0 );
}