Too tough for FX?

I was sitting on ppl, and I think ran into limits w 3 light sources, though as far as I remember instruction count shouldn’t be the problem.

FX5200, FW 56.55
vp:

uniform vec3 tangent;
uniform vec3 binormal;
uniform vec3 eyepos;
uniform vec3 light1pos;
uniform vec3 light2pos;
uniform vec3 light3pos;
varying vec3 f_l1pos;
varying vec3 f_l2pos;
varying vec3 f_l3pos;
varying vec3 f_vpos;
varying vec3 TBNeye;
varying vec3 TBNlight1;
varying vec3 TBNlight2;
varying vec3 TBNlight3;

void main(void)
{
vec3 vtoe = eyepos-gl_Vertex;
vec3 vtol1 = light1pos-gl_Vertex;
vec3 vtol2 = light2pos-gl_Vertex;
vec3 vtol3 = light3pos-gl_Vertex;
vec3 normal = cross(tangent, binormal);
mat3 TBN = mat3(tangent,binormal,normal);
TBNeye = mul(TBN, vtoe);
TBNlight1 = mul(TBN, vtol1);
TBNlight2 = mul(TBN, vtol2);
TBNlight3 = mul(TBN, vtol3);
f_l1pos = light1pos;
f_l2pos = light2pos;
f_l3pos = light3pos;
f_vpos = gl_Vertex.xyz;

gl_Position = mul(gl_ModelViewProjectionMatrix, gl_Vertex);
gl_TexCoord[0] = gl_MultiTexCoord0;
}

debug output:
!!VP2.0

NV_vertex_program generated by NVIDIA Cg compiler

cgc version 1.2.0310 NDA Release, build date Jan 29 2004 17:16:19

command line args: -q -profile vp30 -entry main -oglsl

nv30vp backend compiling ‘main’ program

#vendor NVIDIA Corporation
#version 1.0.02
#profile vp30
#program main
#semantic tangent
#semantic binormal
#semantic eyepos
#semantic light1pos
#semantic light2pos
#semantic light3pos
#semantic gl_ModelViewProjectionMatrix
#var float3 tangent : : c[0] : -1 : 1
#var float3 binormal : : c[1] : -1 : 1
#var float3 eyepos : : c[2] : -1 : 1
#var float3 light1pos : : c[3] : -1 : 1
#var float3 light2pos : : c[4] : -1 : 1
#var float3 light3pos : : c[5] : -1 : 1
#var float4x4 gl_ModelViewProjectionMatrix : : c[6], 4 : -1 : 1
#var float3 f_l1pos : $vout.TEX1 : TEX1 : -1 : 1
#var float3 f_l2pos : $vout.TEX2 : TEX2 : -1 : 1
#var float3 f_l3pos : $vout. : : -1 : 0
#var float3 f_vpos : $vout.TEX3 : TEX3 : -1 : 1
#var float3 TBNeye : $vout.TEX4 : TEX4 : -1 : 1
#var float3 TBNlight1 : $vout.TEX5 : TEX5 : -1 : 1
#var float3 TBNlight2 : $vout.TEX6 : TEX6 : -1 : 1
#var float3 TBNlight3 : $vout.TEX7 : TEX7 : -1 : 1
#var float4 gl_Vertex : $vin.POSITION : ATTR0 : -1 : 1
#var float4 gl_MultiTexCoord0 : $vin.TEXCOORD0 : ATTR8 : -1 : 1
#var float4 gl_Position : $vout.POSITION : HPOS : -1 : 1
#var float4 gl_TexCoord[0] : $vout.TEX0 : TEX0 : -1 : 1
#var float4 gl_TexCoord[1] : $vout.TEX1 : : -1 : 0
#var float4 gl_TexCoord[2] : $vout.TEX2 : : -1 : 0
#var float4 gl_TexCoord[3] : $vout.TEX3 : : -1 : 0
#var float4 gl_TexCoord[4] : $vout.TEX4 : : -1 : 0
#var float4 gl_TexCoord[5] : $vout.TEX5 : : -1 : 0
#var float4 gl_TexCoord[6] : $vout.TEX6 : : -1 : 0
#var float4 gl_TexCoord[7] : $vout.TEX7 : : -1 : 0
#const c[10] = 0 1 2 0
b0:
MOV o[TEX1].xyz, c[3];
MOV o[TEX2].xyz, c[4];
MOV o[TEX3].xyz, v[0].xyzx;
MOV o[TEX0], v[8];
ADD R3.xyz, c[2].xyzx, -v[0].xyzx;
MOV R2.xyz, c[0];
DP3 o[TEX4].x, R2.xyzx, R3.xyzx;
DP3 o[TEX4].y, c[1].xyzx, R3.xyzx;
MUL R0.xyz, R2.zxyz, c[1].yzxy;
MAD R1.xyz, R2.yzxy, c[1].zxyz, -R0.xyzx;
DP3 o[TEX4].z, R1.xyzx, R3.xyzx;
ADD R0.xyz, c[3].xyzx, -v[0].xyzx;
DP3 o[TEX5].x, R2.xyzx, R0.xyzx;
DP3 o[TEX5].y, c[1].xyzx, R0.xyzx;
DP3 o[TEX5].z, R1.xyzx, R0.xyzx;
ADD R0.xyz, c[4].xyzx, -v[0].xyzx;
DP3 o[TEX6].x, R2.xyzx, R0.xyzx;
DP3 o[TEX6].y, c[1].xyzx, R0.xyzx;
DP3 o[TEX6].z, R1.xyzx, R0.xyzx;
ADD R0.xyz, c[5].xyzx, -v[0].xyzx;
DP3 o[TEX7].x, R2.xyzx, R0.xyzx;
DP3 o[TEX7].y, c[1].xyzx, R0.xyzx;
DP3 o[TEX7].z, R1.xyzx, R0.xyzx;
DP4 o[HPOS].x, c[6], v[0];
DP4 o[HPOS].y, c[7], v[0];
DP4 o[HPOS].z, c[8], v[0];
DP4 o[HPOS].w, c[9], v[0];
END

27 instructions

4 temp registers

27 instrs < 256

fp:

uniform sampler2D diffuse;
uniform sampler2D normal;
varying vec3 f_l1pos;
varying vec3 f_l2pos;
varying vec3 f_l3pos;
varying vec3 f_vpos;
varying vec3 TBNeye;
varying vec3 TBNlight1;
varying vec3 TBNlight2;
varying vec3 TBNlight3;

vec3 expand(vec3 v)
{
return (v-0.5)*2;
}

void main (void)
{
float d1 = length(f_l1pos-f_vpos)/3;
float attenuation1 = clamp(1/(d1d1),0,1);
float d2 = length(f_l2pos-f_vpos)/3;
float attenuation2 = clamp(1/(d2
d2),0,1);
float d3 = length(f_l3pos-f_vpos)/3;
float attenuation3 = clamp(1/(d3d3),0,1);
TBNeye = normalize(TBNeye);
TBNlight1 = normalize(TBNlight1);
TBNlight2 = normalize(TBNlight2);
TBNlight3 = normalize(TBNlight3);
vec3 hway1 = normalize(TBNeye+TBNlight1);
vec3 hway2 = normalize(TBNeye+TBNlight2);
vec3 hway3 = normalize(TBNeye+TBNlight3);
vec4 dcol = texture2D(diffuse, gl_TexCoord[0]);
vec4 ncol = texture2D(normal, gl_TexCoord[0]);
ncol.rgb = expand(ncol.rgb);
vec3 lxh1 = pow(dot(hway1,ncol.rgb),32);
vec3 lxh2 = pow(dot(hway2,ncol.rgb),32);
vec3 lxh3 = pow(dot(hway3,ncol.rgb),32);
vec3 lxn1 = dot(TBNlight1,ncol.rgb);
vec3 lxn2 = dot(TBNlight2,ncol.rgb);
vec3 lxn3 = dot(TBNlight3,ncol.rgb);
float diffuse = clamp(lxn1
attenuation1+lxn2attenuation2+lxn3attenuation3,0,1);
float specular = clamp(lxh1attenuation1+lxh2attenuation2+lxh3attenuation3,0,1);
gl_FragColor.rgb = clamp(diffuse
dcol.rgb+specular*dcol.w,0,1);
gl_FragColor.a = 1;
}

No debug output for fp, but compiler reports success. If I use 2 light sources with simplified prog, inst/reg usage stats are:

59 instructions, 4 R-regs, 0 H-regs

How do you pass normal, binormal, tangent, … to the vertex shader? I got some problems when I pass them via vec3 attribtues and multpliying them with the gl_NormalMatrix.

What exactly is the problem you are seeing? The GLSL code and the generated assembly you posted look fine.

I’m passing parameters as:
glUniform3fARB(glGetUniformLocationARB(Bump3pGLSL, “binormal”), x, y, z);
I’m also avoiding mul with gl_NormalMat… to save some computations & as data is passed in worldspace anyways.
The picture I get is -
solid: http://www.lapas.dau.lv/salitis/1.bmp
wireframe: http://www.lapas.dau.lv/salitis/2.bmp

I should see middle sector of the wall rendered with 3 point lights applied, instead I get this (I have color buffer clears turned off in solid mode). Strange is that at some point you can see that bluish wall sector floating somwhere around, then dissapearing somwhere. As I can see, it isn’t shaded, but simply multitextured (brick wall texture + normal map).
I tried leaving only:

gl_FragColor.rgb = vec3(0,1,1);
gl_FragColor.a = 1;

in fragment shader, but still the same effect.
But if I ALSO comment ANY of these lines in vsh, like:

TBNlight1 = mul(TBN, vtol1); or
TBNlight2 = mul(TBN, vtol2); or
TBNlight3 = mul(TBN, vtol3); or
f_l1pos = light1pos; or
f_l2pos = light2pos; or
f_l3pos = light3pos; or

I get the specified output col -> vec3(0,1,1); & also debug fragment shader output in file.
It seems like instr count limit to me, but there WAS 256 instrs in vp & 1024 instrs in fp for FX.
Of course FPS, just skyrockets & so on, but that’s FX5200 & shaders are for demonstrative purposes only

[This message has been edited by M/\dm/
(edited 02-28-2004).]

You use 9 varying variables, but there can be maximum 8. I think this is the source of your troubles.

Hmm, maybe, but compile returns success. V asm shows that it uses only TexCoord[7] at max.
Cubemap normalization isn’t going to solve my problem either
Can somebody give me an advice how to cut variable register usage. I doubt can use color/fogcoord registers to bind data, am I wrong?
Of course I could resort to multipass rendering + ARB_occlussion_query for speed, but if honestly I don’t want to do that. Or I can get away by passing lightpos as uniform to fp too?

BTW, is it worth to jump to norm cube map? I think it should be faster for FX while res is smaller 512x512, what will happen on Radeon? And I don’t want to stick with just 256x256

[This message has been edited by M/\dm/
(edited 02-29-2004).]

Originally posted by M/\dm/
:
Or I can get away by passing lightpos as uniform to fp too?

I think gl_LightSource is available in fragmetn shaders too

Solution is to use uniform variables. FPS dindn’t suffered a lot after adding 1 more light sorce, & it’s ok to use same uniform in vp & fp.
Thanks for the help!

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.