glDrawElements with multi indexing buffers

Hello everybody,
I have dealed with OpenGL 3.3 by the help of my old experiences in former versions of it for a few days and with long breaks.
I have wrote an OBJ loader function in C++ with SDL for things like window management, event handling and binding OpenGL context.
I used glDrawElements to reach my aim of loading an .obj model from any filepatch and drawing it in correct way as values the program got from the file are correct and match with buffer values.
The snippet I typed is:


GLuint LoadOBJ(char *filepath,GLuint *mdl_vb,GLuint *mdl_tb,GLuint *mdl_nb,GLuint *mdl_vidb,GLuint *mdl_tidb,GLuint *mdl_nidb)
{
	char a,b,c;
	char number[20]="";
	GLuint vertex_id=0,
	   texture_id	=0,
		normal_id	=0,
		  face_id	=0,
				*idptr;
	GLfloat *vertexes=new float[30000],
			*textures=new float[30000],
			*normals =new float[30000],
			*ptr;
	GLuint faces[100000];
	ifstream file;
	file.open(filepath,ios::in);
	if(file.is_open())
	{
		while(!file.eof())
		{
			a=file.get();
			switch(a)
			{
			case '#':
				while((a=file.get())!='
' && ! file.eof());
				break;
			case 'v':
				b=file.get();
				switch(b)
				{
				case ' ':
					ptr=vertexes;
					idptr=&vertex_id;
					break;
				case 't':
					ptr=textures;
					idptr=&texture_id;
					break;
				case 'n':
					ptr=normals;
					idptr=&normal_id;
					break;
				}
				c=file.get();
				while(c!='
' && !file.eof())
				{
					while(c==' ') c=file.get();
					number[0]=c;
					int i=1;
					for( ;(c=file.get())>='-' && c<='9'; i++)
					{
						number[i]=c;
					}
					number[i]='\0';
					*(ptr+(*idptr))=strtofloat(number);
					(*idptr)++;
				}
				break;
			case 'f':
				c=file.get();
				while(c!='
' && !file.eof())
				{
					while(c==' ' || c=='/') c=file.get();
					number[0]=c;
					int i=1;
					for( ;(c=file.get())>='0' && c<='9'; i++)
					{
						number[i]=c;
					}
					number[i]='\0';
					*(faces+face_id)=(GLuint)strtoint(number);
					face_id++;
				}
				break;
			case '
':
				break;
			default:
				while((a=file.get())!='
' && ! file.eof());
				break;
			}
		}
	}
	file.close();

	GLuint model_vb,model_tb,model_nb,model_vidb,model_tidb,model_nidb;

	glGenBuffers(1,&model_vb);
	glBindBuffer(GL_ARRAY_BUFFER,model_vb);
	glBufferData(GL_ARRAY_BUFFER,sizeof(GLfloat)*(vertex_id),vertexes,GL_STATIC_DRAW);

	glGenBuffers(1,&model_tb);
	glBindBuffer(GL_ARRAY_BUFFER,model_tb);
	glBufferData(GL_ARRAY_BUFFER,sizeof(GLfloat)*(texture_id),textures,GL_STATIC_DRAW);

	glGenBuffers(1,&model_nb);
	glBindBuffer(GL_ARRAY_BUFFER,model_nb);
	glBufferData(GL_ARRAY_BUFFER,sizeof(GLfloat)*(normal_id),normals,GL_STATIC_DRAW);

	delete[]vertexes;
	delete[]textures;
	delete[]normals;

	GLuint *vertex_ids =new GLuint[50000],
		   *texture_ids=new GLuint[50000],
		   *normal_ids =new GLuint[50000];
	int size_v,size_t,size_n;
	int i=0;
	for(   ; i<face_id/3; i++)
	{
		 vertex_ids[i]=faces[i*3  ]-1;
		texture_ids[i]=faces[i*3+1]-1;
		 normal_ids[i]=faces[i*3+2]-1;
	}

	glGenBuffers(1,&model_vidb);
	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,model_vidb);
	glBufferData(GL_ELEMENT_ARRAY_BUFFER,sizeof(GLuint)*i,vertex_ids,GL_STATIC_DRAW);

	glGenBuffers(1,&model_tidb);
	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,model_tidb);
	glBufferData(GL_ELEMENT_ARRAY_BUFFER,sizeof(GLuint)*i,texture_ids,GL_STATIC_DRAW);

	glGenBuffers(1,&model_nidb);
	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,model_nidb);
	glBufferData(GL_ELEMENT_ARRAY_BUFFER,sizeof(GLuint)*i,normal_ids,GL_STATIC_DRAW);

	delete []vertex_ids;
	delete []texture_ids;
	delete []normal_ids;
	*mdl_vb  =model_vb;
	*mdl_tb  =model_tb;
	*mdl_nb  =model_nb;
	*mdl_vidb=model_vidb;
	*mdl_tidb=model_tidb;
	*mdl_nidb=model_nidb;	

	return (face_id)/3;
}
void DrawOBJ(GLuint mdl_vb,GLuint mdl_tb,GLuint mdl_nb,GLuint mdl_vidb,GLuint mdl_tidb,GLuint mdl_nidb,GLuint used_vertex_number)
{
	glBindBuffer(GL_ARRAY_BUFFER,mdl_vb);
	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,mdl_vidb);
	glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);

	glBindBuffer(GL_ARRAY_BUFFER,mdl_tb);
	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,mdl_tidb);
	glVertexAttribPointer(2,2,GL_FLOAT,GL_FALSE,0,(void*)0);

	glBindBuffer(GL_ARRAY_BUFFER,mdl_nb);
	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,mdl_nidb);
	glVertexAttribPointer(3,3,GL_FLOAT,GL_FALSE,0,(void*)0);


	glDrawElements(GL_TRIANGLES,used_vertex_number,GL_UNSIGNED_INT,0);
}

Now, the result I got from this is not always as I expect. Sometimes it can accidently give correct shapes with lack of proper texture mapping.
It seems that the reason is not wrong values been taken from the file to buffers or incompatibility from 1-indexed to 0. The problem is that glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,xxx) doesn’t exist for glVertexAttribPointer unlike other GL_ARRAY_BUFFER. Since binding a buffer is only one current buffer related to which buffer we are talking about, last draw function consider only lastest bound buffer before its own. Hence, the last glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,mdl_nidb) will be operated and this index array will be used for vertex,texture,normal buffers. And when I examined my object model file again, I figured out that all indexing of vertex and of normals were same so that the model is drown in correct way as shape but I can’t say same thing for texture indexing which wasn’t binded as last bound buffer and is cause of the disordered mapping.
If I am not wrong, the function of glDrawElements is as I mentioned above. Then what do you suggest to overcome multi-indexing without considering I am right or not?
Excuse for my bad English.
Thanks in advance.

OpenGL does not support different indices for vertex attributes, all vertex attributes have to use the same index. You need to rearrange the attributes and index to satisfy that requirement, see this recent thread for details.

Remember too that the point of using indices is not just to save memory; saving memory is actually a fairly minor factor in overall performance here. The point of using indices is to enable you to draw a mesh (which may be composed of multiple different primitive layouts) in a single glDrawElements call, and to enable your GPU’s vertex cache (for GPUs that have one) to work - both of these will get you much more performance than saving a few KB or MB (except in extreme and unlikely cases where your program may be vertex bound - and in such cases implmenting geometry LOD will more likely be a bigger win anyway).

The point of using indices is to enable you to draw a mesh (which may be composed of multiple different primitive layouts) in a single glDrawElements call

I fail to see how indexed rendering is related to the number of draw calls for “multiple different primitive layouts”. If you change the vertex format, you need a new draw call, and indexed rendering isn’t getting around that.

and to enable your GPU’s vertex cache (for GPUs that have one) to work - both of these will get you much more performance than saving a few KB or MB (except in extreme and unlikely cases where your program may be vertex bound - and in such cases implmenting geometry LOD will more likely be a bigger win anyway).

Your GPU’s vertex cache is based on reuse of vertex data, which requires the ability to know when some vertex data is being reused. And the only way to do that is to make your vertex data smaller, by eliminating repetition of vertices.

So yes, the point of using indices is to save memory. It’s just that saving memory saves other things as well.

Saving memory is why indexed rendering was originally created. All of those other things are built on top of the memory savings as optimizations.

There’s a brand new extension that probably hasn’t made it into any drivers yet that should allow this: http://www.opengl.org/registry/specs/AMD/interleaved_elements.txt

Here’s the bits that are different copy-pasted from the example included in the extension:

    // Index data
    static const unsigned short indices[] =
    {
        0, 0,       // vertex 0: position index, normal index
        1, 0,       // vertex 1: position index, normal index
        2, 0,       // vertex 2: position index, normal index
        1, 0,       // vertex 3: position index, normal index
        2, 0,       // ... six vertices, forming
        3, 0,       // ... two complete triangles, all using normals[0]
        0, 1,
        2, 1,
        3, 1,
        3, 1,
        4, 1,       // ... six more vertices, forming
        2, 1,       // ... two more triangles, all using normals[1]
        // etc...
    };

So it basically requires the indices to be listed together.

    // Okay... here's the new code. Set up vertex attribute 0 (position) to
    // consume the RED channel of the vertex index and attribute 1 (normal)
    // to consume the GREEN (second) channel of the vertex index. Then
    // draw with GL_RG16UI (two channel, 16-bit unsigned int) as the index
    // type.
    glVertexAttribParameteriAMD(0, GL_VERTEX_ELEMENT_SWIZZLE_AMD, GL_RED);
    glVertexAttribParameteriAMD(1, GL_VERTEX_ELEMENT_SWIZZLE_AMD, GL_GREEN);


    glDrawElements(GL_TRIANGLES,
                   sizeof(indices) / (2 * sizeof(unsigned short)),
                   GL_RG16UI,
                   NULL);

Each attribute can be chosen to be drawn from the 1st, 2nd, 3rd or 4th index listed, by specifying GL_RED, GL_GREEN, GL_BLUE or GL_ALPHA when calling glVertexAttribParameteriAMD with GL_VERTEX_ELEMENT_SWIZZLE_AMD.

The type parameter of glDrawElements can be GL_RG8UI, GL_RG16UI or GL_RGBAUI which allow 2x8 bit indices, 2 x 16-bit indices or 4 x 8-bit indices to be used, so that the combined index size doesn’t go above 32-bit.

Not objecting to this, I’m just curious which current GPU don’t have one. Mobile platforms? Can you be specific?

There’s a brand new extension that probably hasn’t made it into any drivers yet that should allow this:

That is a very bizarre extension. It’s just a really odd way of specifying that kind of functionality. It would have made far more sense to just have a variable that says how many indices you use, and a parameter that specifies which indices go to which things, as well as some limitations on what types the DrawElements functions can use with certain index counts.

That way, when the time comes to expand this functionality from the more basic form to something more general-purpose, (like, say, more than 4 indices), you can simply remove some of the limitations. Also, you don’t have the silliness of using “red” and “green” component names for the indices; the indexes are indexed by number.

Why does AMD pick the most absurd ways to specify stuff like this? I mean, between their absolute abuse of glBufferData in AMD_pinned_memory and this, it’s like they really have no understanding of how to write an extension.

I don’t see why they don’t simply allow multiple element array buffers to be bound at a time - at max one per generic attribute. Better put, have a general index buffer that applies to every attribute, unless another index buffer is bound to a specific index which overrides the general index buffer.

Could be something like


// bind buffer for position, tex coords (indices 0 and 2)
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, generalIndices);
// bin buffer for normals (index 1)
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 1, normalIndices);

EDIT: Alternatively, one could go the glVertexAttribPointer way and simply allow multiple regions of an index buffer to be source with a single draw call for all (as it is now) or individual vertex attributes. Would actually be more consistent and not fiddle with the buffer binding restrictions.

If I got the extension correctly, except for the akward specififcation of index masks (which the swizzled uints basically are), if you’re up to more than two differing sets of array, you cannot do any reasonable real-world rendering - except for stupid cases like the cube or very low poly stuff due to the 8 bits left for everything.

I don’t see why they don’t simply allow multiple element array buffers to be bound at a time - at max one per generic attribute.

Presumably, they’re implementing something that they can actually do on currently existing hardware, rather than devising an extension that they can’t actually implement yet. Besides, why would you want to put the indices in different buffers? Wouldn’t the natural inclination be to interleave them like vertex data?

If I got the extension correctly, except for the akward specififcation of index masks (which the swizzled uints basically are), if you’re up to more than two differing sets of array, you cannot do any reasonable real-world rendering - except for stupid cases like the cube or very low poly stuff due to the 8 bits left for everything.

Yes, but how often do you truly need more than 2 index lists? The point is not to make .obj loading simpler. The point is not so that you can just forget about mesh optimization and so forth. The point is to be able to, where appropriate, be able to use a different topology for one set of attributes from a different set.

And again, this is about exposing what existing hardware can do, not designing for future hardware. My issue is that they’re exposing the functionality an unreasonable way (ie: the API is bad).

I was pretty excited to see AMD_interleaved_elements, until I read the implementation and realized I’d be limited to at most 16b indices. And for a per-face, per-point, per-vertex setup, the limit is 8b indices. For such small models, might as well just replicate the data and use a single index list; I’ve only seen the vertex buffer savings really matter in very large meshes (like >1M tris).

While reading the intro I was really hoping for a element buffer increment, so that I could just specify a element chunk of 2 or 3 elements, and then they could all be 32b (or any format). Multiple index buffers would have also been great. Whatever the limitation preventing this, at least someone at AMD is thinking about this. I’d really like to see vertex fetching be a bit more flexible than it is now.

Wouldn’t the natural inclination be to interleave them like vertex data?

I think I got that covered in my edit. It came to me a little too late. :wink:

And again, this is about exposing what existing hardware can do, not designing for future hardware.

True. I don’t know enough about current hardware to state that multiple indices aren’t possible per-se. Just thinking theoretically here.

And for a per-face, per-point, per-vertex setup, the limit is 8b indices.

What’s the difference between a “point” and a “vertex” in this scenario?

Whatever the limitation preventing this, at least someone at AMD is thinking about this.

Given the specific way in which they define this functionality, odds are good that they simply have the ability to do a bit of configurable logic in their vertex fetch unit. So it’s not so much that they’re thinking about it but that someone realized that they could hack the hardware a bit to make a gimped version of the feature possible.

What’s the difference between a “point” and a “vertex” in this scenario?

Points are shared in a mesh between adjacent triangles, whereas vertex data is unique per vertex/triangle pair. Our app allows users to set attribute data at four different levels - geometry (uniform), per-face, per-shared point, and per-vertex. This setup existed long before I came on the scene.

I currently model per-face and per-vertex attributes with a geometry shader and per-face/vertex indexing based on gl_PrimitiveIDIn. The alternative is promoting per-face and per-shared point attributes to per-vertex frequency, which can get very heavy and sluggish for large production models (in the 5M+ polygon range). Using multiple element buffers/indices might improve the performance of this case; at least I’d be interested to try.

Given the specific way in which they define this functionality, odds are good that they simply have the ability to do a bit of configurable logic in their vertex fetch unit. So it’s not so much that they’re thinking about it but that someone realized that they could hack the hardware a bit to make a gimped version of the feature possible
.

That was my suspicion as well. For example, the vertex cache is probably keyed off a single 32b uint. Still, these extensions have a way of morphing into useful features, so I remain hopeful :slight_smile:

OK, I solved the problem. As you said, I needed to rearrange the attributes according to vertex position. I decided what I must do and did by adding the following code:


	for(int i=0; i<face_id/3; i++)
	{
		textures2[vertex_ids[i]*2  ] = textures[texture_ids[i]*2  ];
		textures2[vertex_ids[i]*2+1] = textures[texture_ids[i]*2+1];
		normals2 [vertex_ids[i]*3  ] = normals [normal_ids [i]*3  ];
		normals2 [vertex_ids[i]*3+1] = normals [normal_ids [i]*3+1];
		normals2 [vertex_ids[i]*3+2] = normals [normal_ids [i]*3+2];
	}

After I pass the datas of new arrays to buffers and just used the vertex indices for all as GL_ELEMENT_ARRAY_BUFFER I got the result I want.
Well, I got bored due to engaging in usage of OpenGL so I will create somethings from what I learnt by far to encourage myself by seeing nice results of my work.
Now, I will get rid of having to learn new things by finally adding knowledge of Joystick and Audio in order for creating a complete game or developing a new 2D/3D tool for a while. After becoming familiar to all I typed as needed, I will continue to learning more about 3.3 version of the API again.
Thanks everybody for all responds or struggle to help.

You’re misunderstanding. What I’m talking about is a mesh that may be composed of multiple strips and fans, not changing the vertex format. That’s a “my bad” though as using “primitive type (i.e the mode param to a typical draw command)” would have been clearer.