nickels

06-16-2010, 08:20 PM

Hi,

I have a shader that loops some number of lights (determined before execution) and samples corresponding shadow map arrays.

I am using an array of samplers for the shadow maps since I don't know in advance if each light will want the same size texture for its set of shadow maps.

The whole shader is here (section in green is the relevant section):

#version 130

#pragma optionNV unroll all

#extension GL_EXT_texture_array : enable

const int MAX_SHADOW_TEX = 6;

const int MAX_ACTIVE_SHADOW = 3;

uniform int numShadowTex[MAX_ACTIVE_SHADOW];

uniform mat4 LightVP[MAX_ACTIVE_SHADOW*MAX_SHADOW_TEX];

uniform sampler2DArrayShadow STexSampler[MAX_ACTIVE_SHADOW];

//uniform sampler2DArrayShadow STexSampler;

uniform sampler2D texSampler;

uniform sampler2D normalSampler;

uniform int numLights;

uniform sampler1D lightSampler;

// How many entries per light?

uniform int lightStride;

// Describe the light structure

const int LIGHTDIFF = 1;

const int LIGHTDIR = 2;

const int LIGHTSPEC = 3;

uniform int control;

uniform vec3 eyePos;

in vec3 NormalW;

in vec3 TangentW;

in vec3 PosW;

in vec3 PosV;

uniform float ZCuts[MAX_ACTIVE_SHADOW*(MAX_SHADOW_TEX+1)];

float getShadowCoeff(in int l, in int zcut_idx, in int shad_idx) {

//if ( l != 0) return 0.0f;

vec4 smapCoord;

int idx = 0;

float shadowcoeff = 1.0;

// Decide which map to look up into

int s = 1;

bool done = false;

while (!done && s <= numShadowTex[l]) {

if (-PosV.z <= ZCuts[zcut_idx+s])

done = true;

else

s++;

}

idx = s - 1;

smapCoord = LightVP[shad_idx + idx]*vec4(PosW, 1.0);

smapCoord = 0.5*(smapCoord+1.0);

smapCoord.w = smapCoord.z;

smapCoord.z = float(idx);

int mi = idx;

idx += shad_idx;

// Temp to activate shadow map

// CASE 1

vec4 samp;

samp = shadow2DArray( STexSampler[l], smapCoord);

/*

//CASE 2:

switch(l) {

case 0:

samp = shadow2DArray( STexSampler[0], smapCoord);

break;

case 1:

samp = shadow2DArray( STexSampler[1], smapCoord);

break;

case 2:

samp = shadow2DArray( STexSampler[2], smapCoord);

break;

}

*/

/*

//CASE 3:

if (0 == l) {

samp = shadow2DArray( STexSampler[0], smapCoord);

} else if ( 1 == l) {

samp = shadow2DArray( STexSampler[1], smapCoord);

} else if ( 2 == l) {

samp = shadow2DArray( STexSampler[2], smapCoord);

}

*/

/*

return float(l);

samp = shadow2DArray( STexSampler[0], smapCoord);

*/

shadowcoeff = samp.r;

// Gaussian 3x3 filter

/*

if (smapCoord.x < 0.0 || smapCoord.x > 1.0 || smapCoord.y < 0.0 ||

smapCoord.y > 1.0) {

shadowcoeff = 10.0f;

}

*/

if (smapCoord.x < 0.0 || smapCoord.x > 1.0) {

shadowcoeff = 10.0;

} else if (smapCoord.y < 0.0 || smapCoord.y > 1.0) {

shadowcoeff = 0.0;

}

return shadowcoeff;

}

void main(void) {

vec4 diff_lookup = texture2D(texSampler, gl_TexCoord[0].st);

//vec4 lightdiff = texture1D(lightSampler, 0, 0);

float emit = diff_lookup.a*255.0;

vec4 emissive = vec4(0.0);

// Peel off the emissive light

if (emit > 1.0) {

emissive = diff_lookup*emit;

//diffuse = vec4(0.0);

diff_lookup = vec4(0.0);

}

// Logic to choose the normal, either based on the geometry

// or the normal map.

vec3 NormalF;

if (1 == (control & 0x01)) {

vec3 normalT = vec3(texture2D(normalSampler, gl_TexCoord[0].st));

// uncompress from [0,1] to [-1,1]

normalT = 2.0*normalT - 1.0;

vec3 N = normalize(NormalW);

vec3 T = normalize(TangentW - dot(TangentW, N)*N);

vec3 B = cross(N, T);

mat3 TBN = mat3(T, B, N);

NormalF = normalize(TBN*normalT);

} else {

NormalF = normalize(NormalW);

}

vec4 col = vec4(0.0);

vec4 ambient = vec4(0.0);

int zcut_idx = 0;

int shad_idx = 0;

for (int l = 0; l < numLights; ++l) {

int lb = lightStride*l;

vec3 lightDir = -normalize(vec3(texelFetch(lightSampler, lb + LIGHTDIR, 0)));

vec4 lightdiff = texelFetch(lightSampler, lb + LIGHTDIFF, 0);

vec4 spec = texelFetch(lightSampler, lb + LIGHTSPEC, 0);

vec4 diffuse = diff_lookup*lightdiff;

// Diffuse light calculation

float lightS = dot(NormalF, lightDir);

vec4 specular = vec4(0.0);

ambient += 0.1*diffuse;

if (lightS > 0.0) {

diffuse *= lightS;

// specular light

float specPower = max(spec.a, 1.0);

vec3 toEye = normalize(eyePos - PosW);

vec3 reflectVec = reflect(-lightDir, NormalF);

float specFactor = pow(max(dot(reflectVec, toEye), 0.0), specPower);

float specmap = 1.0; // should look this up in specmap

specular = specFactor*specmap*spec*diff_lookup;

} else {

diffuse = vec4(0.0);

}

float shadowcoeff = 1.0;

shadowcoeff = getShadowCoeff(l, zcut_idx, shad_idx);

// Shadowcoeff calc

col += emissive + shadowcoeff*(diffuse + specular);

zcut_idx += (numShadowTex[l] + 1);

shad_idx += numShadowTex[l];

} // numLights

col += ambient/float(numLights);

gl_FragColor = vec4(col.x, col.y, col.z, 1.0);

}

If I enable CASE 1: I get an error like:

0(80) : error C5208: Sampler needs to be a uniform (global or parameter to main), need to inline function or resolve conditional expression

(0) : fatal error C9999: unable to generate code for texture function.

Is this right?

So I try to unroll the loop, which actually runs (either CASE 2 or 3). However, if the number of lights is less than the number unrolled, I get a shadowmap factor that doesn't make any sense, because, apparently, the sampler case for the last lights are being sampled and corrupting the earlier values (which I verify by commenting out the unused cases and everything works fine).

Which leads me to believe that in the unrolled case I may be getting a performance hit, since, likely, all three samples are being called for each value of l!!

Any suggestions what to do? My only thought at this point is to autogenerate the shader for each case, unrolling the loops explicity. But then I worry about code bloat??

My system info:

GeForce GTX 260, windows 7, driver=197.45

Mem: 1792 MB

Thanks!

I have a shader that loops some number of lights (determined before execution) and samples corresponding shadow map arrays.

I am using an array of samplers for the shadow maps since I don't know in advance if each light will want the same size texture for its set of shadow maps.

The whole shader is here (section in green is the relevant section):

#version 130

#pragma optionNV unroll all

#extension GL_EXT_texture_array : enable

const int MAX_SHADOW_TEX = 6;

const int MAX_ACTIVE_SHADOW = 3;

uniform int numShadowTex[MAX_ACTIVE_SHADOW];

uniform mat4 LightVP[MAX_ACTIVE_SHADOW*MAX_SHADOW_TEX];

uniform sampler2DArrayShadow STexSampler[MAX_ACTIVE_SHADOW];

//uniform sampler2DArrayShadow STexSampler;

uniform sampler2D texSampler;

uniform sampler2D normalSampler;

uniform int numLights;

uniform sampler1D lightSampler;

// How many entries per light?

uniform int lightStride;

// Describe the light structure

const int LIGHTDIFF = 1;

const int LIGHTDIR = 2;

const int LIGHTSPEC = 3;

uniform int control;

uniform vec3 eyePos;

in vec3 NormalW;

in vec3 TangentW;

in vec3 PosW;

in vec3 PosV;

uniform float ZCuts[MAX_ACTIVE_SHADOW*(MAX_SHADOW_TEX+1)];

float getShadowCoeff(in int l, in int zcut_idx, in int shad_idx) {

//if ( l != 0) return 0.0f;

vec4 smapCoord;

int idx = 0;

float shadowcoeff = 1.0;

// Decide which map to look up into

int s = 1;

bool done = false;

while (!done && s <= numShadowTex[l]) {

if (-PosV.z <= ZCuts[zcut_idx+s])

done = true;

else

s++;

}

idx = s - 1;

smapCoord = LightVP[shad_idx + idx]*vec4(PosW, 1.0);

smapCoord = 0.5*(smapCoord+1.0);

smapCoord.w = smapCoord.z;

smapCoord.z = float(idx);

int mi = idx;

idx += shad_idx;

// Temp to activate shadow map

// CASE 1

vec4 samp;

samp = shadow2DArray( STexSampler[l], smapCoord);

/*

//CASE 2:

switch(l) {

case 0:

samp = shadow2DArray( STexSampler[0], smapCoord);

break;

case 1:

samp = shadow2DArray( STexSampler[1], smapCoord);

break;

case 2:

samp = shadow2DArray( STexSampler[2], smapCoord);

break;

}

*/

/*

//CASE 3:

if (0 == l) {

samp = shadow2DArray( STexSampler[0], smapCoord);

} else if ( 1 == l) {

samp = shadow2DArray( STexSampler[1], smapCoord);

} else if ( 2 == l) {

samp = shadow2DArray( STexSampler[2], smapCoord);

}

*/

/*

return float(l);

samp = shadow2DArray( STexSampler[0], smapCoord);

*/

shadowcoeff = samp.r;

// Gaussian 3x3 filter

/*

if (smapCoord.x < 0.0 || smapCoord.x > 1.0 || smapCoord.y < 0.0 ||

smapCoord.y > 1.0) {

shadowcoeff = 10.0f;

}

*/

if (smapCoord.x < 0.0 || smapCoord.x > 1.0) {

shadowcoeff = 10.0;

} else if (smapCoord.y < 0.0 || smapCoord.y > 1.0) {

shadowcoeff = 0.0;

}

return shadowcoeff;

}

void main(void) {

vec4 diff_lookup = texture2D(texSampler, gl_TexCoord[0].st);

//vec4 lightdiff = texture1D(lightSampler, 0, 0);

float emit = diff_lookup.a*255.0;

vec4 emissive = vec4(0.0);

// Peel off the emissive light

if (emit > 1.0) {

emissive = diff_lookup*emit;

//diffuse = vec4(0.0);

diff_lookup = vec4(0.0);

}

// Logic to choose the normal, either based on the geometry

// or the normal map.

vec3 NormalF;

if (1 == (control & 0x01)) {

vec3 normalT = vec3(texture2D(normalSampler, gl_TexCoord[0].st));

// uncompress from [0,1] to [-1,1]

normalT = 2.0*normalT - 1.0;

vec3 N = normalize(NormalW);

vec3 T = normalize(TangentW - dot(TangentW, N)*N);

vec3 B = cross(N, T);

mat3 TBN = mat3(T, B, N);

NormalF = normalize(TBN*normalT);

} else {

NormalF = normalize(NormalW);

}

vec4 col = vec4(0.0);

vec4 ambient = vec4(0.0);

int zcut_idx = 0;

int shad_idx = 0;

for (int l = 0; l < numLights; ++l) {

int lb = lightStride*l;

vec3 lightDir = -normalize(vec3(texelFetch(lightSampler, lb + LIGHTDIR, 0)));

vec4 lightdiff = texelFetch(lightSampler, lb + LIGHTDIFF, 0);

vec4 spec = texelFetch(lightSampler, lb + LIGHTSPEC, 0);

vec4 diffuse = diff_lookup*lightdiff;

// Diffuse light calculation

float lightS = dot(NormalF, lightDir);

vec4 specular = vec4(0.0);

ambient += 0.1*diffuse;

if (lightS > 0.0) {

diffuse *= lightS;

// specular light

float specPower = max(spec.a, 1.0);

vec3 toEye = normalize(eyePos - PosW);

vec3 reflectVec = reflect(-lightDir, NormalF);

float specFactor = pow(max(dot(reflectVec, toEye), 0.0), specPower);

float specmap = 1.0; // should look this up in specmap

specular = specFactor*specmap*spec*diff_lookup;

} else {

diffuse = vec4(0.0);

}

float shadowcoeff = 1.0;

shadowcoeff = getShadowCoeff(l, zcut_idx, shad_idx);

// Shadowcoeff calc

col += emissive + shadowcoeff*(diffuse + specular);

zcut_idx += (numShadowTex[l] + 1);

shad_idx += numShadowTex[l];

} // numLights

col += ambient/float(numLights);

gl_FragColor = vec4(col.x, col.y, col.z, 1.0);

}

If I enable CASE 1: I get an error like:

0(80) : error C5208: Sampler needs to be a uniform (global or parameter to main), need to inline function or resolve conditional expression

(0) : fatal error C9999: unable to generate code for texture function.

Is this right?

So I try to unroll the loop, which actually runs (either CASE 2 or 3). However, if the number of lights is less than the number unrolled, I get a shadowmap factor that doesn't make any sense, because, apparently, the sampler case for the last lights are being sampled and corrupting the earlier values (which I verify by commenting out the unused cases and everything works fine).

Which leads me to believe that in the unrolled case I may be getting a performance hit, since, likely, all three samples are being called for each value of l!!

Any suggestions what to do? My only thought at this point is to autogenerate the shader for each case, unrolling the loops explicity. But then I worry about code bloat??

My system info:

GeForce GTX 260, windows 7, driver=197.45

Mem: 1792 MB

Thanks!