Thanks for your help guys, but I think those are not the problem in my code. So here it is:
These are the variables used in my code:
typedef struct tsVertex
{
GLfloat u;
GLfloat v;
GLfloat nx;
GLfloat ny;
GLfloat nz;
GLfloat x;
GLfloat y;
GLfloat z;
} tsVertex;
tsVertex *VertexBuffer;
tsVertex *Buffers[TS_RENDERER_BUFF_NUM];
GLuint BufferFence[TS_RENDERER_BUFF_NUM];
GLuint BufferLevel;
GLuint CurrentBuffer;
tsVertex VA[4096];
GLushort indices[8064];
This is where I initialize OpenGL:
glViewport(0, 0, CurrentMode->sWidth, CurrentMode->sHeight);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(45.0, (GLfloat)CurrentMode->sWidth/(GLfloat)CurrentMode->sHeight, TS_NEAR_CLIPPING_PLANE, TS_FAR_CLIPPING_PLANE);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LEQUAL);
glClearDepth(1.0);
glDrawBuffer(GL_BACK);
glShadeModel(GL_SMOOTH);
glEnable(GL_CULL_FACE);
glCullFace(GL_BACK);
glPolygonMode(GL_FRONT, GL_FILL);
glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST);
glHint(GL_FOG_HINT, GL_NICEST);
glEnable(GL_LIGHTING);
glEnable(GL_LIGHT0);
glEnable(GL_TEXTURE_2D);
glClearColor(0.0, 0.0, 0.0, 1.0);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
This is the VAR initialization code:
FlushVertexArrayRangeNV = (PFNGLFLUSHVERTEXARRAYRANGENVPROC)wglGetProcAddress(“glFlushVertexArrayRangeNV”);
VertexArrayRangeNV = (PFNGLVERTEXARRAYRANGENVPROC)wglGetProcAddress(“glVertexArrayRangeNV”);
AllocateMemoryNV = (PFNGWGLALLOCATEMEMORYNVPROC)wglGetProcAddress(“wglAllocateMemoryNV”);
FreeMemoryNV = (PFNGWGLFREEMEMORYNVPROC)wglGetProcAddress(“wglFreeMemoryNV”);
if (!FlushVertexArrayRangeNV | | !VertexArrayRangeNV | | !AllocateMemoryNV | | !FreeMemoryNV)
{
return -1;
}
GenFencesNV = (PFNGLGENFENCESNVPROC)wglGetProcAddress(“glGenFencesNV”);
DeleteFencesNV = (PFNGLDELETEFENCESNVPROC)wglGetProcAddress(“glDeleteFencesNV”);
SetFenceNV = (PFNGLSETFENCENVPROC)wglGetProcAddress(“glSetFenceNV”);
TestFenceNV = (PFNGLTESTFENCENVPROC)wglGetProcAddress(“glTestFenceNV”);
FinishFenceNV = (PFNGLFINISHFENCENVPROC)wglGetProcAddress(“glFinishFenceNV”);
if (!GenFencesNV | | !DeleteFencesNV | | !SetFenceNV | | !TestFenceNV | | !FinishFenceNV)
{
return -1;
}
VertexBuffer = (tsVertex *)AllocateMemoryNV(sizeof(tsVertex) * 65536, 0.2f, 0.2f, 0.7f);
if (VertexBuffer == NULL)
{
return -1;
}
VertexArrayRangeNV(sizeof(tsVertex) * 65536, VertexBuffer);
Buffers[0] = VertexBuffer;
BufferLevel = 0;
CurrentBuffer = 0;
glEnableClientState(GL_VERTEX_ARRAY_RANGE_NV);
glInterleavedArrays(GL_T2F_N3F_V3F, 0, Buffers[0]);
for (i = 0; i < TS_RENDERER_BUFF_NUM; i++)
GenFencesNV(1, &(BufferFence[i]));
This is the rendering function:
void RenderArray(tsVertex *VArray, GLsizei VNumber, GLushort *Indices, GLsizei INumber)
{
int i,j;
if (!TestFenceNV(BufferFence[CurrentBuffer]))
FinishFenceNV(BufferFence[CurrentBuffer]);
if (BufferLevel + VNumber > 65536)
BufferLevel = 0;
Buffers[CurrentBuffer] = &(VertexBuffer[BufferLevel]);
memcpy(Buffers[CurrentBuffer], VArray, sizeof(tsVertex) * VNumber);
glInterleavedArrays(GL_T2F_N3F_V3F, 0, Buffers[CurrentBuffer]);
glDrawElements(GL_TRIANGLE_STRIP, INumber, GL_UNSIGNED_SHORT, Indices);
SetFenceNV(BufferFence[CurrentBuffer], GL_ALL_COMPLETED_NV);
BufferLevel += VNumber;
CurrentBuffer++;
CurrentBuffer %= TS_RENDERER_BUFF_NUM;
}
Here is the data generation code:
for (i = 0; i < 64; i++)
{
for (j = 0; j < 64; j++)
{
VA[i * 64 + j].x = j * 0.1f;
VA[i * 64 + j].y = i * 0.1f;
VA[i * 64 + j].z = 0.0f;
VA[i * 64 + j].nx = 0.0f;
VA[i * 64 + j].ny = 0.0f;
VA[i * 64 + j].nz = 1.0f;
VA[i * 64 + j].u = (GLfloat)j / (GLfloat)63;
VA[i * 64 + j].v = (GLfloat)i / (GLfloat)63;
}
}
for (i = 0; i < 63; i++)
{
for (j = 0; j < 64; j++)
{
indices[i * 128 + j * 2] = i * 64 + j;
indices[i * 128 + j * 2 + 1] = (i + 1) * 64 + j;
}
}
And finally the rendering loop:
for (i = 0; i < 7; i++)
{
glLoadIdentity();
Camera->Apply();
glTranslatef(0.0, 0.0, i * (-5.0f));
RenderArray(VA, 4096, indices, 8064);
}
The main problem is, that I get better performance in normal vertex array mode than in VAR mode. The performance gets higher than in normal vertex array mode if I replace memcpy in RenderArray() with the data generation code. Is my code correct, and this is the maximum transfer rate of my MX200, or am I doing something wrong?
[This message has been edited by Catman (edited 05-10-2002).]