PDA

View Full Version : Problem w/ convolution and filter size



Foxbat
03-27-2007, 09:24 PM
I use this shader to do convolution. It seems to work for the most part. However when the kernel size gets up to ~70 the output becomes corrupted. Any ideas why?

( nvidia 7800 GTX. Driver version 93.71 )

( And yes 'kernel_weight' can be factored but not in the final version of shader.... )


uniform float kernel_ds;
uniform float kernel_dt;
uniform int kernel_size;
uniform sampler2D filter_kernel;

uniform float image_ds;
uniform float image_dt;
extern uniform sampler2D base_texture;

void convolve_func( inout vec4 color, in vec2 image_tex_coord_in )
{
int i, j;
vec2 image_tex_coord, kernel_tex_coord;
vec3 color_result;
float kernel_weight, tf_s, tf_t;

kernel_tex_coord.s = kernel_tex_coord.t = 0.0;

// Center pixel
kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;
color_result = texture2D( base_texture, image_tex_coord_in ).rgb * kernel_weight;


// x == 0 and y == 0 axes - excluding center
for ( i = 1; i < kernel_size; i++ ) {

kernel_tex_coord.s = i * kernel_ds;
kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;

tf_s = float( i ) * image_ds;
tf_t = float( i ) * image_ds;

// Right
image_tex_coord.s = image_tex_coord_in.s + tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

// Left
image_tex_coord.s = image_tex_coord_in.s - tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

image_tex_coord.s = image_tex_coord_in.s;

// Top
image_tex_coord.t = image_tex_coord_in.t + tf_t;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

// Bottom
image_tex_coord.t = image_tex_coord_in.t - tf_t;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

}

for ( i = 1; i < kernel_size; ++i ) {

image_tex_coord.s = image_tex_coord_in.s + image_ds;
image_tex_coord.t = image_tex_coord_in.t + ( image_dt * float( i ) );

kernel_tex_coord.s = kernel_ds;
kernel_tex_coord.t = kernel_dt * float( i );

tf_t = float( i ) * image_dt;

for ( j = 1; j < kernel_size; ++j ) {

kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;

tf_s = float( j ) * image_ds;

// Pixels above
image_tex_coord.t = image_tex_coord_in.t + tf_t;

// Above right
image_tex_coord.s = image_tex_coord_in.s + tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

// Above left
image_tex_coord.s = image_tex_coord_in.s - tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

// Pixels below
image_tex_coord.t = image_tex_coord_in.t - tf_t;

// Below right
image_tex_coord.s = image_tex_coord_in.s + tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

// Below left
image_tex_coord.s = image_tex_coord_in.s - tf_s;
color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

kernel_tex_coord.s += kernel_ds;

}
}

color = vec4( color_result, 1.0 );

}

jra101
03-28-2007, 12:22 PM
Most likely the for loops end up executing more than GL_MAX_PROGRAM_EXEC_INSTRUCTIONS_NV instructions and at that point, the GPU sets the finally color to whatever is in the R0 register (likely the result of whatever the last instruction issued was).