Problem w/ convolution and filter size

I use this shader to do convolution. It seems to work for the most part. However when the kernel size gets up to ~70 the output becomes corrupted. Any ideas why?

( nvidia 7800 GTX. Driver version 93.71 )

( And yes ‘kernel_weight’ can be factored but not in the final version of shader… )

  
uniform float kernel_ds;
uniform float kernel_dt;
uniform int kernel_size;
uniform sampler2D filter_kernel;

uniform float image_ds;
uniform float image_dt;
extern uniform sampler2D base_texture;

void convolve_func( inout vec4 color, in vec2 image_tex_coord_in )
{
	int i, j;
	vec2 image_tex_coord, kernel_tex_coord;
	vec3 color_result;
	float kernel_weight, tf_s, tf_t;

	kernel_tex_coord.s = kernel_tex_coord.t = 0.0;

	// Center pixel
	kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;
	color_result = texture2D( base_texture, image_tex_coord_in ).rgb  * kernel_weight;


	// x == 0 and y == 0 axes - excluding center
	for ( i = 1; i < kernel_size; i++ ) {

		kernel_tex_coord.s = i * kernel_ds;
		kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;

		tf_s = float( i ) * image_ds;
		tf_t = float( i ) * image_ds;

		// Right
		image_tex_coord.s = image_tex_coord_in.s + tf_s;
		color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

		// Left
		image_tex_coord.s = image_tex_coord_in.s - tf_s;
		color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

		image_tex_coord.s = image_tex_coord_in.s;

		// Top
		image_tex_coord.t = image_tex_coord_in.t + tf_t;
		color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

		// Bottom
		image_tex_coord.t = image_tex_coord_in.t - tf_t;
		color_result += texture2D( base_texture, image_tex_coord ).rgb * kernel_weight;

	}

	for ( i = 1; i < kernel_size; ++i ) {

		image_tex_coord.s = image_tex_coord_in.s + image_ds;
		image_tex_coord.t = image_tex_coord_in.t + ( image_dt * float( i ) );

		kernel_tex_coord.s = kernel_ds;
		kernel_tex_coord.t = kernel_dt * float( i );

		tf_t = float( i ) * image_dt;

		for ( j = 1; j < kernel_size; ++j ) {

			kernel_weight = texture2D( filter_kernel, kernel_tex_coord ).r;

			tf_s = float( j ) * image_ds;

			// Pixels above
			image_tex_coord.t = image_tex_coord_in.t + tf_t;

			// Above right
			image_tex_coord.s = image_tex_coord_in.s + tf_s;
			color_result += texture2D( base_texture, image_tex_coord ).rgb  * kernel_weight;

			// Above left
			image_tex_coord.s = image_tex_coord_in.s - tf_s;
			color_result += texture2D( base_texture, image_tex_coord ).rgb  * kernel_weight;

			// Pixels below
			image_tex_coord.t = image_tex_coord_in.t - tf_t;

			// Below right
			image_tex_coord.s = image_tex_coord_in.s + tf_s;
			color_result += texture2D( base_texture, image_tex_coord ).rgb  * kernel_weight;

			// Below left
			image_tex_coord.s = image_tex_coord_in.s - tf_s;
			color_result += texture2D( base_texture, image_tex_coord ).rgb  * kernel_weight;

			kernel_tex_coord.s += kernel_ds;

		}
	}

	color = vec4( color_result, 1.0 );

}

Most likely the for loops end up executing more than GL_MAX_PROGRAM_EXEC_INSTRUCTIONS_NV instructions and at that point, the GPU sets the finally color to whatever is in the R0 register (likely the result of whatever the last instruction issued was).

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.