PDA

View Full Version : PCSS shader runs in software mode?



Arkion
09-23-2005, 11:31 PM
Hello all

I ported the Percentage-Closer Soft Shadows HLSL demo shader from NVIDIA GDC 2005 presentations to GLSL. When I run it in my app, the app goes into halt and can only be closed from task manager. When tried it in ShaderDesigner, it reports that the shader is run in software due to unsupported language element used - so that's obviously the culprit. I'm having difficulties pointing out the offending code.

I have ATI Radeon 9700 with newest cat 3.9 drivers installed. I'd be really grateful if someone could help me point out that language element. I suspect it could be the for loops, but then again, I've seen them in some ATI demo shaders and it worked there. Here's the code for both shaders:


[Vertex shader]
/**
* PCSS lighting shader. Ported from R. Fernando's
* HLSL shader.
*/
uniform mat4 lightMVP;

varying vec4 lightClipPos; // Vertex position in light's clip space
varying vec3 lVecTBN; // Light vector in TBN space
varying vec3 vVecTBN; // View vector in TBN space
varying vec2 shadowMapUV;

void main()
{
vec4 Po = vec4(gl_Vertex.xyz, 1.0); // Pos in object space
vec4 Pe = gl_ModelViewMatrix * gl_Vertex; // Pos in eye space
vec4 Pl = lightMVP * Pe; // Pos in light space

Pl.z -= 0.01; // Shadow bias

gl_Position = ftransform();
gl_TexCoord[0] = gl_MultiTexCoord0;
lightClipPos = Pl;

// Calculate Light, View vectors and tangent basis vectors in eye space
vec3 L = gl_LightSource[0].position.xyz - Pe.xyz;
vec3 V = -Pe.xyz;

vec3 eyeNormal = gl_NormalMatrix * gl_Normal;
vec3 eyeTangent = gl_NormalMatrix * tangent;
vec3 eyeBinormal = gl_NormalMatrix * -binormal;

// Transform Light and View vectors into tangent space
lVecTBN = vec3(dot(L, eyeTangent), dot(L, eyeBinormal), dot(L, eyeNormal));
vVecTBN = vec3(dot(V, eyeTangent), dot(V, eyeBinormal), dot(V, eyeNormal));

// Convert coordinates from [-1..1] range to [0..1] range, perform homogenous division.
shadowMapUV = vec2(0.5, -0.5) * (Pl.xy/Pl.w) + vec2(0.5, 0.5);
}

[Fragment shader]
uniform sampler2D BaseMap;
uniform sampler2D NormalMap;
uniform sampler2D ShadowMap;
uniform sampler2D Spot;

varying vec4 lightClipPos; // Position in light's clip space
varying vec3 lVecTBN; // Light vector in TBN
varying vec3 vVecTBN; // View vector
varying vec2 shadowMapUV;

float FindBlocker(vec2 uv, vec4 Lpos, float searchWidth, float numSamples);
float EstimatePenumbra(vec2 uv, vec4 Lpos, float blocker, float lightSize);
float PCF(vec2 uv, vec4 Lpos, float filterWidth, float numSamples);

/*
* Fragment shader main
*/
void main()
{
vec3 L = normalize(lVecTBN);
vec3 V = normalize(vVecTBN);

vec2 uv = shadowMapUV;

///////////////////////////////////////////////////
// STEP 1: Blocker estimate
float blocker = FindBlocker(uv, lightClipPos, 1.0, 6.0);

///////////////////////////////////////////////////
// STEP 2: Penumbra size estimation
float penumbra = EstimatePenumbra(uv, lightClipPos, blocker, 0.01);

///////////////////////////////////////////////////
// STEP 3: PCF
float shadowed;

if(penumbra > 0.01)
penumbra = 0.01;

shadowed = PCF(uv, lightClipPos, penumbra, 6.0);

// If no blocker, return 1.0 since the point isn't in shadow
if(blocker == 0.0)
shadowed = 1.0;

// Final color
vec4 lightMap = texture2D(Spot, shadowMapUV.xy);
vec4 material = texture2D(BaseMap, gl_TexCoord[0].xy);

// light diffuse color * light intensity * spot texture sample
lightMap = gl_LightSource[0].diffuse * gl_LightSource[0].diffuse.w * lightMap;

float result = shadowed;
gl_FragColor = vec4(result, result, result, 1.0) * material * lightMap;
}

/*
* Search for potential blockers
*/
float FindBlocker(vec2 uv, vec4 Lpos, float searchWidth, float numSamples)
{
float stepSize = 2.0 * searchWidth/numSamples;

// Starting point uv coords for search
uv = uv - vec2(searchWidth, searchWidth);

float blockerSum = 0.0;
float blockerCount = 0.0;
float receiver = Lpos.z;

// iterate through search region and add up depth values
for(float i = 0.0; i < numSamples; i += 1.0) // FIXME
{
for(float j = 0.0; j < numSamples; j += 1.0) // FIXME
{
float shadMapDepth = texture2D(ShadowMap, uv + vec2(i * stepSize, j * stepSize)).x;

// found a blocker
if(shadMapDepth < receiver)
{
blockerSum += shadMapDepth;
blockerCount += 1.0;
}
}
}

float result = blockerSum/blockerCount; // FIXME
return result;
}

/*
* Function to estimate the shadow penumbra size.
*/
float EstimatePenumbra(vec2 uv, vec4 Lpos, float blocker, float lightSize)
{
// receiver depth
float receiver = Lpos.z;

// estimate penumbra using parallel planes approximation
float penumbra = (receiver - blocker) * lightSize/blocker;

return penumbra;
}

/*
* Percentage Closer Filtering with customizable filter kernel size and sample amount.
*/
float PCF(vec2 uv, vec4 Lpos, float filterWidth, float numSamples)
{
float stepSize = 2.0 * filterWidth/numSamples;

uv = uv - vec2(filterWidth, filterWidth); // FIXME: why not (uv - filterWidth) ?

float sum = 0.0;

// now iterate through the kernel and filter the values
for(float i = 0.0; i < numSamples; i += 1.0) // FIXME
{
for(float j = 0.0; j < numSamples; j += 1.0) // FIXME
{
// get depth at current texel of the shadow map
float shadMapDepth = 0.0;

shadMapDepth = texture2D(ShadowMap, uv + vec2(i*stepSize, j*stepSize)).x;

// test if the depth in the shadow map is closer than
// the eye-view point
float shad = Lpos.z < shadMapDepth ? 1.0 : 0.0;

// accumulate result
sum += shad;
}
}

// return average of the samples
return sum/(numSamples * numSamples);
}

Overmind
09-24-2005, 03:24 AM
The functions PCF and FindBlocker contain for loops with a non-constant iteration count. AFAIK the 9700 doesn't support dynamic branch statements, so it must unroll all loops, but that can only be done if the loop count is a compile time constant.

Arkion
09-24-2005, 07:58 AM
Thanks for the info, Overmind! This should be easily fixable by replacing the numSamples variables in the for statements with constant values (numSamples is known at the compile time, after all).