Problem w/ convolution and filter size

Foxbat · March 27, 2007, 9:24pm

I use this shader to do convolution. It seems to work for the most part. However when the kernel size gets up to ~70 the output becomes corrupted. Any ideas why?

( nvidia 7800 GTX. Driver version 93.71 )

( And yes ‘kernel_weight’ can be factored but not in the final version of shader… )

  
uniform float kernel_ds;
uniform float kernel_dt;
uniform int kernel_size;
uniform sampler2D filter_kernel;

uniform float image_ds;
uniform float image_dt;
extern uniform sampler2D base_texture;

void convolve_func( inout vec4 color, in vec2 image_tex_coord_in )
{
	int i, j;
	vec2 image_tex_coord, kernel_tex_coord;
	vec3 color_result;
	float kernel_weight, tf_s, tf_t;

	kernel_tex_coord.s = kernel_tex_coord.t = 0.0;

	// Center pixel
	kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;
	color_result = texture2D( base_texture, image_tex_coord_in ).rgb  * kernel_weight;


	// x == 0 and y == 0 axes - excluding center
	for ( i = 1; i < kernel_size; i++ ) {

		kernel_tex_coord.s = i * kernel_ds;
		kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;

		tf_s = float( i ) * image_ds;
		tf_t = float( i ) * image_ds;

		// Right
		image_tex_coord.s = image_tex_coord_in.s + tf_s;
		color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

		// Left
		image_tex_coord.s = image_tex_coord_in.s - tf_s;
		color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

		image_tex_coord.s = image_tex_coord_in.s;

		// Top
		image_tex_coord.t = image_tex_coord_in.t + tf_t;
		color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

		// Bottom
		image_tex_coord.t = image_tex_coord_in.t - tf_t;
		color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

	}

	for ( i = 1; i < kernel_size; ++i ) {

		image_tex_coord.s = image_tex_coord_in.s + image_ds;
		image_tex_coord.t = image_tex_coord_in.t + ( image_dt * float( i ) );

		kernel_tex_coord.s = kernel_ds;
		kernel_tex_coord.t = kernel_dt * float( i );

		tf_t = float( i ) * image_dt;

		for ( j = 1; j < kernel_size; ++j ) {

			kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;

			tf_s = float( j ) * image_ds;

			// Pixels above
			image_tex_coord.t = image_tex_coord_in.t + tf_t;

			// Above right
			image_tex_coord.s = image_tex_coord_in.s + tf_s;
			color_result += texture2D( base_texture, image_tex_coord ).rgb  * kernel_weight;

			// Above left
			image_tex_coord.s = image_tex_coord_in.s - tf_s;
			color_result += texture2D( base_texture, image_tex_coord ).rgb  * kernel_weight;

			// Pixels below
			image_tex_coord.t = image_tex_coord_in.t - tf_t;

			// Below right
			image_tex_coord.s = image_tex_coord_in.s + tf_s;
			color_result += texture2D( base_texture, image_tex_coord ).rgb  * kernel_weight;

			// Below left
			image_tex_coord.s = image_tex_coord_in.s - tf_s;
			color_result += texture2D( base_texture, image_tex_coord ).rgb  * kernel_weight;

			kernel_tex_coord.s += kernel_ds;

		}
	}

	color = vec4( color_result, 1.0 );

}

jra101 · March 28, 2007, 12:22pm

Most likely the for loops end up executing more than GL_MAX_PROGRAM_EXEC_INSTRUCTIONS_NV instructions and at that point, the GPU sets the finally color to whatever is in the R0 register (likely the result of whatever the last instruction issued was).

system · October 19, 2021, 7:39pm

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.