PDA

View Full Version : AMD's GLSL Compiler bug



YarUnderoaker
11-08-2010, 04:56 AM
There is my shader fragment program

#version 330 core
precision highp float;

struct frec {
vec4 WorldPosition;
vec3 LightEmissive;
vec3 LightDiffuse;
vec3 LightSpecular;
vec3 Emissive;
vec3 Diffuse;
float DiffusePower;
vec3 Specular;
float SpecularPower;
float Opacity;
float OpacityMask;
vec3 Normal;
vec3 CameraVector;
};

struct lrec {
vec4 WorldPosition;
vec3 Ambient;
vec3 Diffuse;
vec3 Specular;
vec3 SpotDirection;
float SpotCutoff;
float SpotExponent;
float ConstantAtten;
float LinearAtten;
float QuadAtten;
};

in vec4 fWorldPosition;
in vec3 fCameraVector;
out vec4 FragColor;

layout(std140) uniform LightsBlock {
uniform lrec Lights[16];
};

uniform ivec4 LightIndices[16];
uniform int LightNumber;

void GetFragment(inout frec A);
void Illuminate(inout frec A);
void PassFragmentColor(inout frec A);

void main()
{
frec Local0;
Local0.Opacity = 1.0;
GetFragment(Local0);
Illuminate(Local0);
PassFragmentColor(Local0);
}

void GetFragment(inout frec A)
{
A.WorldPosition = fWorldPosition;
A.CameraVector = fCameraVector;
}

void pointLight(inout frec A, lrec B)
{
float nDotVP; // normal . light direction
float nDotHV; // normal . light half vector
float attenuation; // computed attenuation factor
float d; // distance from surface to light source
vec3 VP; // direction from surface to light position
vec3 halfVector; // direction of maximum highlights

// Compute vector from surface to light position
VP = B.WorldPosition.xyz - A.WorldPosition.xyz;

// Compute distance between surface and light position
d = length(VP);

// Normalize the vector from surface to light position
VP = normalize(VP);

// Compute attenuation
attenuation = 1.0 / (B.ConstantAtten + B.LinearAtten * d + B.QuadAtten * d * d);

halfVector = normalize(VP + A.CameraVector);

nDotVP = pow(max(0.0, dot(A.Normal, VP)), A.DiffusePower);
nDotHV = pow(max(0.0, dot(A.Normal, halfVector)), A.SpecularPower);

A.LightEmissive += B.Ambient * attenuation;
A.LightDiffuse += B.Diffuse * nDotVP * attenuation;
A.LightSpecular += B.Specular * nDotHV * attenuation;
}

void spotLight(inout frec A, lrec B)
{
float nDotVP; // normal . light direction
float nDotHV; // normal . light half vector
float spotDot; // cosine of angle between spotlight
float spotAttenuation; // spotlight attenuation factor
float attenuation; // computed attenuation factor
float d; // distance from surface to light source
vec3 VP; // direction from surface to light position
vec3 halfVector; // direction of maximum highlights

// Compute vector from surface to light position
VP = B.WorldPosition.xyz - A.WorldPosition.xyz;

// Compute distance between surface and light position
d = length(VP);

// Normalize the vector from surface to light position
VP = normalize(VP);

// Compute attenuation
attenuation = 1.0 / (B.ConstantAtten + B.LinearAtten * d + B.QuadAtten * d * d);

// See if point on surface is inside cone of illumination
spotDot = dot(-VP, normalize(B.SpotDirection));

if (spotDot < cos(0.01745329251994*B.SpotCutoff))
{
spotAttenuation = 0.0; // light adds no contribution
}
else
{
spotAttenuation = pow(spotDot, B.SpotExponent);

}
// Combine the spotlight and distance attenuation.
attenuation *= spotAttenuation;

halfVector = normalize(VP + A.CameraVector);

nDotVP = pow(max(0.0, dot(A.Normal, VP)), A.DiffusePower);
nDotHV = pow(max(0.0, dot(A.Normal, halfVector)), A.SpecularPower);

A.LightEmissive += B.Ambient * attenuation;
A.LightDiffuse += B.Diffuse * nDotVP * attenuation;
A.LightSpecular += B.Specular * nDotHV * attenuation;
}

void directionalLight(inout frec A, lrec B)
{
float nDotVP; // normal . light direction
float nDotHV; // normal . light half vector
vec3 VP; // direction from surface to light position
vec3 halfVector; // direction of maximum highlights

// Compute vector from surface to light position
VP = normalize(B.WorldPosition.xyz - A.WorldPosition.xyz);

halfVector = normalize(B.SpotDirection + A.CameraVector);

nDotVP = pow(max(0.0, dot(A.Normal, VP)), A.DiffusePower);
nDotHV = pow(max(0.0, dot(A.Normal, halfVector)), A.SpecularPower);

A.LightEmissive += B.Ambient;
A.LightDiffuse += B.Diffuse * nDotVP;
A.LightSpecular += B.Specular * nDotHV;
}

void infiniteSpotLight(inout frec A, lrec B)
{
float nDotVP; // normal . light direction
float nDotHV; // normal . light half vector
vec3 VP; // direction from surface to light position
vec3 halfVector; // direction of maximum highlights
float spotAttenuation;
vec3 Ppli;
vec3 Sdli;

// Compute vector from surface to light position
VP = normalize(B.WorldPosition.xyz - A.WorldPosition.xyz);

halfVector = normalize(B.SpotDirection + A.CameraVector);

nDotVP = pow(max(0.0, dot(A.Normal, VP)), A.DiffusePower);
nDotHV = pow(max(0.0, dot(A.Normal, halfVector)), A.SpecularPower);

Ppli = -VP;
Sdli = B.SpotDirection;

spotAttenuation = pow(dot(Ppli, Sdli), B.SpotExponent);

A.LightEmissive += B.Ambient * spotAttenuation;
A.LightDiffuse += B.Diffuse * nDotVP * spotAttenuation;
A.LightSpecular += B.Specular * nDotHV * spotAttenuation;
}
void Illuminate(inout frec A)
{
A.LightEmissive = vec3(0.0,0.0,0.0);
A.LightDiffuse = vec3(0.0,0.0,0.0);
A.LightSpecular = vec3(0.0,0.0,0.0);

for (int I = 0; I<LightNumber &amp;&amp; I<16; I++)
{
int J = LightIndices[I].x;
lrec LightSource = Lights[J];
if (LightSource.WorldPosition.w == 1.0)
{
if (LightSource.SpotCutoff == 180.0)
{
pointLight(A,LightSource);
}
else
{
spotLight(A,LightSource);
}
}
else
{
if (LightSource.SpotCutoff == 180.0)
{
directionalLight(A,LightSource);
}
else
{
infiniteSpotLight(A,LightSource);
}
}
}
vec3 finalColor;
finalColor = A.Emissive*clamp(A.LightEmissive,vec3(0.0),vec3(1. 0));
finalColor += A.Diffuse*clamp(A.LightDiffuse,vec3(0.0),vec3(1.0) );
finalColor += A.Specular*clamp(A.LightSpecular,vec3(0.0),vec3(1. 0));
A.Emissive = finalColor;
}

void PassFragmentColor(inout frec A)
{
FragColor = vec4(A.Emissive, A.Opacity);
}

The fragment object compile successfully, but program linking fail with error

Fragment shader(s) failed to link, vertex shader(s) failed to link.
unexpected error.
unexpected error.

I trying to test it by GPU Shader Analyze
in version 1.53 all fine,
but in version 1.55 fail with error


Internal compilation failure. Possibly caused by GSA not supporting a GLSL feature used in shader.
ERROR: 0:7: error(#132) Syntax error: ';' parse error
ERROR: error(#273) 1 compilation errors. No code generated

Also I trying to simplify the shader, to leave only a one pointLight for first light source, to replace the uniform block to one structured uniform - does not help.

My system
Catalist 10.10 or 10.9
Radeon HD5450
WinXP SP2

YarUnderoaker
11-08-2010, 05:42 AM
I find workaround - exchange varyings and structure declaration.
I can not understand why this happens.
In GLSL specification do not specify a strong order.

frank li
11-08-2010, 07:03 PM
The shader works for me with Cat10.10 driver. I'm using HD5700, but I don't think there will be much difference.

YarUnderoaker
11-09-2010, 12:21 AM
Did you install 10.10c hotfix. I'm not because XP. Perhaps this is reason.

frank li
11-09-2010, 09:50 PM
I can't find the 10.10c hotfix for winxp. I retry the shader on winxp+Cat10.10+HD5450. It works.

If it's not resolved on your side, you could try to narrow down your project and send it to me by frank.li@amd.com.

YarUnderoaker
11-10-2010, 04:18 AM
Ok. I sent project to test.

YarUnderoaker
11-10-2010, 02:26 PM
People, don't look at shader, it vector algebra is wrong :o

YarUnderoaker
11-11-2010, 01:09 PM
Tested on
Radeon HD 4800, WinXP sp3, Catalist 10.9

same error (GL debug callback):
glLinkProgram failed to link a GLSL program with the following program info log: 'Fragment shader(s) failed to link, vertex shader(s) failed to link.
unexpected error.
unexpected error.

frank li
11-11-2010, 11:05 PM
It's a known issue and is fixed recently. Thanks for providing the demo to reproduce it.

Chris Lux
11-22-2010, 07:05 AM
hi frank,
can we have access to a beta driver with the fix included?

i am working on a tessellation shader on an AMD platform and my shaders compile without errors or warnings, but when linking them i get the same error message:


Fragment shader(s) failed to link, vertex shader(s) failed to link.
unexpected error.
unexpected error.

i tried swapping some declarations around in all shaders, but i can not get rid of the error.

another problem i found is the following when using gl_InvocationID to index the control vertex in the tessellation control shader:


#version 400 core

layout(vertices = 4) out;

// tessellation control shader
in per_vertex {
vec2 texcoord;
} vertex_in[];

out per_vertex {
vec2 texcoord;
} vertex_out[];

...

// not working
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
// working
gl_out[0].gl_Position = gl_in[0].gl_Position;
gl_out[1].gl_Position = gl_in[1].gl_Position;
gl_out[2].gl_Position = gl_in[2].gl_Position;
gl_out[3].gl_Position = gl_in[3].gl_Position;
// not working
vertex_out[gl_InvocationID].texcoord = vertex_in[gl_InvocationID].texcoord;
// working
vertex_out[0].texcoord = vertex_in[0].texcoord;
vertex_out[1].texcoord = vertex_in[1].texcoord;
vertex_out[2].texcoord = vertex_in[2].texcoord;
vertex_out[3].texcoord = vertex_in[3].texcoord;


I am using a OpenGL 4.0 core profile context on Windows 7 x64.

frank li
11-22-2010, 07:51 PM
The gl_InvocationID is also fixed recently.

Usually I'm not clear when the fix will be included in the beta driver. If you want to get access to a beta driver on windows, you have to get in touch with the workstation ISV engineering team.

david_f_knight
11-22-2010, 09:36 PM
Here's an idea: have you tried declaring vertex_in[] and vertex_out[] with constants (presumably 4) rather than leaving their size implicit/undefined? Give that a try and see what happens.

YarUnderoaker
11-23-2010, 01:47 AM
Yesterday I installed Catalist 10.11 and find other problem.
There is shader object to fetch light source properties

#version 400 core
precision highp float;

struct lrec
{
vec4 WorldPosition;
vec3 Ambient;
vec3 Diffuse;
vec3 Specular;
vec3 SpotDirection;
float SpotCutoff;
float SpotExponent;
float ConstantAtten;
float LinearAtten;
float QuadAtten;
float SpotCosCutoff;
};

layout(std140) uniform LightsBlock { lrec Lights[16]; };
uniform int LightIndices[16];

lrec GetLight(int A)

{
int J = LightIndices[A];
return Lights[J];
}
It compile fine.

But when program linking - error will happen
error(#278) Uniform block layout qualifiers can only be applied to the keyword uniform(layout qualifier should not apply to a specific declaration)

I try to remove layout(std140) qualifier - same error.

PS: a previous shader from first post now compile but data gets rightly only for first array element, in another is trash.
In general, both shaders are the same, but the functions are placed on different objects, however, the compiler interprets them differently. I'm stuck with it and can not continue to develop :(

aqnuep
11-23-2010, 07:23 AM
I think your problem is that you incorrectly declared your uniform block. Try this way:


layout(std140) uniform lightsblock {
lrec Lights[16];
} LightsBlock;

Notice that the block variable name is at the end.

YarUnderoaker
11-23-2010, 07:38 AM
Yes, I tried that too, but same error.
According to spec instance-name is optional and
if is not used, the names declared inside the block are scoped at the global level and accessed as if they were declared outside the block.

aqnuep
11-23-2010, 10:36 AM
Ahh, didn't know that btw. I always used explicit instance name.
However, I wonder how's that possible that the same error is raised also when you removed the layout(std140) qualifier. It should not complain about layout qualifiers if no such thing is used in the shader.
Are you sure that the link error comes from this shader? Maybe another shader part of the program is the guilty one.

frank li
11-23-2010, 09:32 PM
I can't reproduce your problem with Cat10.11. Could you please paste the whole shader?

YarUnderoaker
11-24-2010, 06:42 AM
Are you sure that the link error comes from this shader? Maybe another shader part of the program is the guilty one.
Yes, I'm sure.

There is test project http://glscene.svn.sourceforge.net/viewvc/glscene/branches/TestForAMD/3xMaterialT4.rar?revision=5325
It is same as I send Frank Li previously, but has minimal different in shader objects. Please try it.

aqnuep
11-24-2010, 10:08 AM
It fails at my side also with Cat10.10e.
However, it is pretty difficult to figure out what is what in the log file.
Maybe you should write a much simpler app to be sure that the error does not come from incorrect shader source code loading, or whatever.

Off-topic: You are the developer of GLScene? I was really amazed when I tried it a few years ago. Since that, I abandoned Delphi and moved to C++ as unfortunately Delphi is not a well accepted programming language for games and graphics, besides, it lacks of good libraries, but I also like the language better than C++.

YarUnderoaker
11-24-2010, 12:33 PM
Okey, I'll try to write more simple program. But log is simple too, in ShaderCompilation.log message marked with (EE) is error.
MatTest4.log contain message from callback of GL_AMD_debug_output
like

glLinkProgram failed to link a GLSL program with the following program info log: 'Fragment shader(s) failed to link, vertex shader(s) failed to link.
unexpected error.
unexpected error.
PS: try it run on NVidia. I know NV is has more softer demand, but work fine as need for me :)

Off-topic:
Yep, I one of current GLScene's developer.
I try to lay foundations for future, when new hardware is more common, and drivers are more stable to work with the present version of OpenGL :)
Currently working on node based material editor (Shader graph) build-in IDE
http://img12.imageshost.ru/img/2010/11/06/image_4cd4834b1682c_small.png (http://imageshost.ru/photo/1536854/id226103.html)
I've already learned Delphi, and have no desire to switch to another language just because they are more popular or have more libraries, whereas there are many other tasks, rather than spending time on mastering other languages. And Pascal for me is more human-friendly language :) especially as it progresses, both from the Embarcadero, and from the FreePascal

frank li
11-25-2010, 12:50 AM
I verified that it's a driver bug.
A possible workaround is that swapping the order of the shaders that are using "struct lrec".

YarUnderoaker
11-25-2010, 04:21 AM
Okey, thanks.

YarUnderoaker
12-03-2010, 04:33 AM
Workaround is work. But only data of first light properly read.
So I must wait for Catalist 10.12 with new year gifts ;)

YarUnderoaker
12-06-2010, 03:10 AM
On the agenda the question - an array of structures is a bug or a feature?
The GLSL Specification does not clearly explain this.
Please, anyone can answer whether it is worth doing well or not? If not - then what alternative, simple array of structure without unifrom block or block per light or manual unrolling without array but static branching?

Dark Photon
12-06-2010, 07:06 AM
On the agenda the question - an array of structures is a bug or a feature? ... The GLSL Specification does not clearly explain this.

Definitely a feature, not a bug. The GLSL 4.0 spec says in the "Arrays" section "All basic types and structures can be formed into arrays.".

And even better info on uniform arrays of structs can be found in the OpenGL specification:

If you look at the glGetActiveUniform (http://www.opengl.org/sdk/docs/man4/xhtml/glGetActiveUniform.xml) man page, it explicitly calls out uniform arrays of structures, marking it valid, and telling you how they're going to be handled (see "Uniform variables that are declared as structures or arrays of structures..."), and this is mirrored by language directly in the OpenGL spec.

So "uniform arrays of structs" are perfectly valid, and I wouldn't just give up on this approach. And I agree from your example code that this form lays very well, compared to the alternatives I could think of.

I have tried "ordinary arrays of structures" (on NVidia with ordinary shader data) and it works well.

What I haven't tried (or spent time getting working anyway) is what you're doing: "uniform" arrays of structures. Have only tried passing in uniform arrays of scalar/vector types, and that works well.

I can also tell you that your code in this post (http://www.opengl.org/discussion_boards/ubbthreads.php?ubb=showflat&amp;Number=286507#Post2865 07) compiles fine on the NVidia compiler.


If not - then what alternative, simple array of structure without unifrom block or block per light or manual unrolling without array but static branching?

Well, looks like what you have should work. I'll plug this it up to code at some point and give it a whirl. I've you've already got a short GLUT test prog driver, post, and I'll try here on NVidia.

If all else fails, I can tell you that the following kludgy work-around (compared to your clean solution) "does" work. That is, instead of array of structures, use multiple arrays of primitive scalar/vector types, one for each "struct" member:



const int NUM_LIGHTS = 16;

uniform vec4 LightWorldPosition[ NUM_LIGHTS ];
uniform vec4 LightAmbient [ NUM_LIGHTS ];
uniform vec3 LightDiffuse [ NUM_LIGHTS ];
...

And definitely works efficiently (on NVidia), without forcing manual loop unrolling.

(In the future, GLSL posts like this would probably be better placed in the GLSL forum, until the issue is established to be a vendor-specific bug/quirk.

Dark Photon
12-06-2010, 07:29 AM
One thing that is a little odd about uniform arrays of structs...

With uniform arrays of ordinary scalar/vector data, glGetActiveUniform says it just returns one entry per uniform array. So you can populate a whole array with a single uniform call, which is pretty efficient.

But with uniform arrays of "structs" it says:


Uniform variables that are declared as structures or arrays of structures will not be returned directly by this function. Instead, each of these uniform variables will be reduced to its fundamental components containing the "." and "[]" operators such that each of the names is valid as an argument to glGetUniformLocation. Each of these reduced uniform variables is counted as one active uniform variable and is assigned an index. A valid name cannot be a structure, an array of structures, or a subcomponent of a vector or matrix.

which suggests you can't set all the light[].diffuse values with a single uniform call, you end up having individual sets for:

* light[0].diffuse
* light[1].diffuse
* light[2].diffuse
...
* light[0].specular
* light[1].specular
* light[2].specular
...
(N calls for M elements = N*M sets to populate!!!)

I hope I'm reading this wrong because that seems pretty horrible. Anybody read this differently? Or is there another API I haven't played with which is better tuned to discovering and populating uniform arrays of structures?

kRogue
12-06-2010, 08:00 AM
I hope I'm reading this wrong because that seems pretty horrible. Anybody read this differently? Or is there another API I haven't played with which is better tuned to discovering and populating uniform arrays of structures?


I think you are reading it perfectly correctly, separate arrays are better than arrays of structs for uniforms to set bunches at the same time..... to set an array of structs, the only thing that I can think of is uniform buffer objects (which is not the same thing).. oh wait..that was how this thread started :D

Dark Photon
12-06-2010, 06:59 PM
I think you are reading it perfectly correctly, separate arrays are better than arrays of structs for uniforms to set bunches at the same time..... to set an array of structs, the only thing that I can think of is uniform buffer objects (which is not the same thing).. oh wait..that was how this thread started :D
Ok. Yeah, definitely a major disadvantage there. In a perfect world, you'd like to get light[].diffuse as a uniform array you can set at once. And so on for light[].specular, etc.

So on the multiple uniform arrays of primitive types vs. UBOs front, I think the main factor there is possibly speed, with normal uniform arrays allegedly leading, according to Ilian Dinev (random thread link (http://www.opengl.org/discussion_boards/ubbthreads.php?ubb=showflat&amp;Number=277222#Post2772 22)), at least on NVidia.

frank li
12-06-2010, 07:47 PM
An array of structure is certainly a feature and AMD supports it.
Some difference is that AMD driver has limitation on the uniform block array size. For example,
uniform block0
{
...
} B[size];
If the "size" is greater than we expected, the compiler will throw out an error.

YarUnderoaker
12-07-2010, 03:09 AM
New I try variant with multiple scalar/vector array in block


struct lrec {
vec4 WorldPosition;
vec3 Ambient;
vec3 Diffuse;
vec3 Specular;
vec3 SpotDirection;
float SpotExponent;
float ConstantAtten;
float LinearAtten;
float QuadAtten;
float SpotCosCutoff;
};

layout(std140) uniform LightsBlock {
vec4 WorldPosition[8];
vec4 Ambient[8];
vec4 Diffuse[8];
vec4 Specular[8];
vec4 SpotDirection[8];
float SpotExponent[8];
float ConstantAtten[8];
float LinearAtten[8];
float QuadAtten[8];
float SpotCosCutoff[8];
};

uniform int LightIndices[8];

lrec GetLight(int A)
{
A = LightIndices[A];
lrec B;
B.WorldPosition = WorldPosition[A];
B.Ambient = Ambient[A].rgb;
B.Diffuse = Diffuse[A].rgb;
B.Specular = Specular[A].rgb;
B.SpotDirection = SpotDirection[A].xyz;
B.SpotExponent = SpotExponent[A];
B.ConstantAtten = ConstantAtten[A];
B.LinearAtten = LinearAtten[A];
B.QuadAtten = QuadAtten[A];
B.SpotCosCutoff = SpotCosCutoff[A];
return B;
}

Get block-offset for each uniform


uniform: vec4 WorldPosition Block offset: 0
uniform: vec4 Ambient Block offset: 128
uniform: vec4 Diffuse Block offset: 256
uniform: vec4 Specular Block offset: 384
uniform: vec4 SpotDirection Block offset: 512
uniform: float SpotExponent Block offset: 640
uniform: float ConstantAtten Block offset: 768
uniform: float LinearAtten Block offset: 896
uniform: float QuadAtten Block offset: 1024
uniform: float SpotCosCutoff Block offset: 1152

Make similar structure for host


const
MAX_HARDWARE_LIGHT = 8;
TLightSourceState = packed record
Position: array[0..MAX_HARDWARE_LIGHT-1] of TVector;
Ambient: array[0..MAX_HARDWARE_LIGHT-1] of TVector;
Diffuse: array[0..MAX_HARDWARE_LIGHT-1] of TVector;
Specular: array[0..MAX_HARDWARE_LIGHT-1] of TVector;
SpotDirection: array[0..MAX_HARDWARE_LIGHT-1] of TAffineVector;
Space1: array[0..MAX_HARDWARE_LIGHT-1] of Single;
SpotExponent: array[0..MAX_HARDWARE_LIGHT-1] of Single;
Space2: array[0..MAX_HARDWARE_LIGHT-1] of TAffineVector;
ConstantAtten: array[0..MAX_HARDWARE_LIGHT-1] of Single;
Space3: array[0..MAX_HARDWARE_LIGHT-1] of TAffineVector;
LinearAtten: array[0..MAX_HARDWARE_LIGHT-1] of Single;
Space4: array[0..MAX_HARDWARE_LIGHT-1] of TAffineVector;
QuadAtten: array[0..MAX_HARDWARE_LIGHT-1] of Single;
Space5: array[0..MAX_HARDWARE_LIGHT-1] of TAffineVector;
SpotCosCutoff: array[0..MAX_HARDWARE_LIGHT-1] of Single;
Space6: array[0..MAX_HARDWARE_LIGHT-1] of TAffineVector;
end;

Check host and device block size - it's both equal 1280 byte.

In result
properly read only first light, with other looks like properties fetched with wrong offset
not GL error, no shader error

YarUnderoaker
12-07-2010, 04:46 AM
Ops :o
I made mistake in host structure. All member must be TVector (vec4)

All right. Multiple vector arrays in uniform block work as need.
Thanks to Dark Photon for idea.

I make two test (on Cedar VGA). Data was update once per frame (1280 byte)
1) uniform arrays without block - 220 fps
2) uniform arrays in block - 270 fps

YarUnderoaker
12-07-2010, 06:52 AM
Mmm, I packed scalar properties to vectors, that reduce block size to 896 byte and increase FPS to 300 for four lights. For one light it value ~550. Flooding off in topic...