PCSS shader runs in software mode?

Hello all

I ported the Percentage-Closer Soft Shadows HLSL demo shader from NVIDIA GDC 2005 presentations to GLSL. When I run it in my app, the app goes into halt and can only be closed from task manager. When tried it in ShaderDesigner, it reports that the shader is run in software due to unsupported language element used - so that’s obviously the culprit. I’m having difficulties pointing out the offending code.

I have ATI Radeon 9700 with newest cat 3.9 drivers installed. I’d be really grateful if someone could help me point out that language element. I suspect it could be the for loops, but then again, I’ve seen them in some ATI demo shaders and it worked there. Here’s the code for both shaders:

[Vertex shader]
/**
 * PCSS lighting shader. Ported from R. Fernando's
 * HLSL shader.
 */
uniform mat4 lightMVP;

varying vec4 lightClipPos; // Vertex position in light's clip space
varying vec3 lVecTBN;      // Light vector in TBN space
varying vec3 vVecTBN;      // View vector in TBN space
varying vec2 shadowMapUV;

void main()
{
	vec4 Po = vec4(gl_Vertex.xyz, 1.0);       // Pos in object space
	vec4 Pe = gl_ModelViewMatrix * gl_Vertex; // Pos in eye space
	vec4 Pl = lightMVP * Pe;                  // Pos in light space

	Pl.z -= 0.01; // Shadow bias

	gl_Position    = ftransform();
	gl_TexCoord[0] = gl_MultiTexCoord0;
	lightClipPos   = Pl;

	// Calculate Light, View vectors and tangent basis vectors in eye space
	vec3 L = gl_LightSource[0].position.xyz - Pe.xyz;
	vec3 V = -Pe.xyz;

	vec3 eyeNormal = gl_NormalMatrix * gl_Normal;
	vec3 eyeTangent = gl_NormalMatrix * tangent;
	vec3 eyeBinormal = gl_NormalMatrix * -binormal;
	
	// Transform Light and View vectors into tangent space
	lVecTBN = vec3(dot(L, eyeTangent), dot(L, eyeBinormal), dot(L, eyeNormal));
	vVecTBN = vec3(dot(V, eyeTangent), dot(V, eyeBinormal), dot(V, eyeNormal));

	// Convert coordinates from [-1..1] range to [0..1] range, perform homogenous division.
	shadowMapUV = vec2(0.5, -0.5) * (Pl.xy/Pl.w) + vec2(0.5, 0.5);
}

[Fragment shader]
uniform sampler2D BaseMap;
uniform sampler2D NormalMap;
uniform sampler2D ShadowMap;
uniform sampler2D Spot;

varying vec4 lightClipPos;   // Position in light's clip space
varying vec3 lVecTBN;        // Light vector in TBN
varying vec3 vVecTBN;        // View vector
varying vec2 shadowMapUV;

float FindBlocker(vec2 uv, vec4 Lpos, float searchWidth, float numSamples);
float EstimatePenumbra(vec2 uv, vec4 Lpos, float blocker, float lightSize);
float PCF(vec2 uv, vec4 Lpos, float filterWidth, float numSamples);

/*
 * Fragment shader main
 */
void main()
{
	vec3 L = normalize(lVecTBN);
	vec3 V = normalize(vVecTBN);
	
	vec2 uv = shadowMapUV;

	///////////////////////////////////////////////////
	// STEP 1: Blocker estimate
	float blocker = FindBlocker(uv, lightClipPos, 1.0, 6.0);
	
	///////////////////////////////////////////////////
	// STEP 2: Penumbra size estimation
	float penumbra = EstimatePenumbra(uv, lightClipPos, blocker, 0.01);
	
	///////////////////////////////////////////////////
	// STEP 3: PCF
	float shadowed;

	if(penumbra > 0.01)
		penumbra = 0.01;

	shadowed = PCF(uv, lightClipPos, penumbra, 6.0);

	// If no blocker, return 1.0 since the point isn't in shadow
	if(blocker == 0.0)
		shadowed = 1.0;

	// Final color
	vec4 lightMap = texture2D(Spot, shadowMapUV.xy);
	vec4 material = texture2D(BaseMap, gl_TexCoord[0].xy);
	
	// light diffuse color * light intensity * spot texture sample
	lightMap = gl_LightSource[0].diffuse * gl_LightSource[0].diffuse.w * lightMap;
	
	float result = shadowed;
	gl_FragColor = vec4(result, result, result, 1.0) * material * lightMap;
}

/*
 * Search for potential blockers
 */
float FindBlocker(vec2 uv, vec4 Lpos, float searchWidth, float numSamples)
{
	float stepSize = 2.0 * searchWidth/numSamples;
	
	// Starting point uv coords for search
	uv = uv - vec2(searchWidth, searchWidth);

	float blockerSum = 0.0;
	float blockerCount = 0.0;
	float receiver = Lpos.z;

        // iterate through search region and add up depth values
        for(float i = 0.0; i < numSamples; i += 1.0) // FIXME
	{
               for(float j = 0.0; j < numSamples; j += 1.0) // FIXME
	       {
                       float shadMapDepth = texture2D(ShadowMap, uv + vec2(i * stepSize, j * stepSize)).x;

                       // found a blocker
                       if(shadMapDepth < receiver)
		       {
                               blockerSum += shadMapDepth;
                               blockerCount += 1.0;
                       }
               }
        }

	float result = blockerSum/blockerCount; // FIXME
	return result;
}

/*
 * Function to estimate the shadow penumbra size.
 */
float EstimatePenumbra(vec2 uv, vec4 Lpos, float blocker, float lightSize)
{
       // receiver depth
       float receiver = Lpos.z;

       // estimate penumbra using parallel planes approximation
       float penumbra = (receiver - blocker) * lightSize/blocker;
       
       return penumbra;
}

/*
 * Percentage Closer Filtering with customizable filter kernel size and sample amount.
 */
float PCF(vec2 uv, vec4 Lpos, float filterWidth, float numSamples)
{
       float stepSize = 2.0 * filterWidth/numSamples;

       uv = uv - vec2(filterWidth, filterWidth); // FIXME: why not (uv - filterWidth) ?

       float sum = 0.0;

       // now iterate through the kernel and filter the values
       for(float i = 0.0; i < numSamples; i += 1.0) // FIXME
       {
               for(float j = 0.0; j < numSamples; j += 1.0) // FIXME
	       {
                       // get depth at current texel of the shadow map
                       float shadMapDepth = 0.0;
                       
                       shadMapDepth = texture2D(ShadowMap, uv + vec2(i*stepSize, j*stepSize)).x;

                       // test if the depth in the shadow map is closer than
                       // the eye-view point
                       float shad = Lpos.z < shadMapDepth ? 1.0 : 0.0;

                       // accumulate result
                       sum += shad;
               }
       }
       
       // return average of the samples
       return sum/(numSamples * numSamples);
}

The functions PCF and FindBlocker contain for loops with a non-constant iteration count. AFAIK the 9700 doesn’t support dynamic branch statements, so it must unroll all loops, but that can only be done if the loop count is a compile time constant.

Thanks for the info, Overmind! This should be easily fixable by replacing the numSamples variables in the for statements with constant values (numSamples is known at the compile time, after all).

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.