Hello,
I have a big world of ca. 2 Million Tiles (2D-Engine). For the graphics the tiles
uses subparts of Textures to avoid many Texture-Switchings.
To scroll in this big world I splitted the World in many “renderpages” with the size
of the screen. Each Renderpage knows, which Tiles belongs to them.
So If I scroll in my World, I have to draw max. 5000-6000 Tiles (Quads), instead
the whole 2 Million Tiles. Cause I have different Texture-Bindings and different Layers,
I developed my renderer with a few Vertex-Arrays (6 Layer and 8 different Textures per Layer = 48 VA’s).
In the main-loop of my renderer I loop through the visible Renderpages and
store the Tiles-Koordinates in my VA’s. Then I draw this VA’s with glDrawArrays.
Because I use the VA’s in the dynamic way, I get “only” 200 FPS with it.
So I decided to use VBO’s with the VA’s to get some more performance.
After I have modified the code, I got only 50 FPS!
Can somebody explain me, what I have done wrong and why is my renderer with VBO’s
so much slower than my version only VA’s ?
Here are parts of the Code with only the VA’s and 200 FPS:
INITIALISATION:
...
...
this->numberOfLayers = 6;
this->numberOfStoragesPerLayer = 4;
this->vertexArray = new VertexArray*[this->numberOfLayers];
for (int l=0; l < this->numberOfLayers; l++)
{
this->vertexArray[l] = new VertexArray[this->numberOfStoragesPerLayer];
for (int z=0; z < this->numberOfStoragesPerLayer; z++)
{
this->vertexArray[l][z].setSizeOfVertexArray(5000 * 12);
this->vertexArray[l][z].setSizeOfColorArray(5000 *16);
this->vertexArray[l][z].setSizeOfTexCoordArray(5000 *8);
}
}
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
...
...
MAINLOOP:
...
...
for (int i=0; i < 4;i++)
{
if (screenX[i] == -1 || screenY[i] == -1)
continue;
vector<Tile> * tiles = this->renderPages[screenX[i]][screenY[i]].getTiles();
for (vector<Tile>::iterator tilesIter=tiles->begin(); tilesIter < tiles->end(); tilesIter++)
{
float layer = tilesIter->layer;
float x = tilesIter->x -(*camX);
float y = tilesIter->y -(*camY);
float width = tilesIter->width;
float height = tilesIter->height;
float rectAX1 = x;
float rectAX2 = x+width;
float rectAY1 = y;
float rectAY2 = y+height;
float rectBX1 = 0.0f;
float rectBX2 = this->screenWidth;
float rectBY1 = 0.0f;
float rectBY2 = this->screenHeight;
if (!(rectAX2 >= rectBX1 &&
rectAX1 <= rectBX2 &&
rectAY2 >= rectBY1 &&
rectAY1 <= rectBY2))
continue;
float colorRed = tilesIter->rgbaColor.red;
float colorGreen = tilesIter->rgbaColor.green;
float colorBlue = tilesIter->rgbaColor.blue;
float colorAlpha = tilesIter->rgbaColor.alpha;
float texLeft = tilesIter->texLeft;
float texTop = tilesIter->texTop;
float texRight = tilesIter->texRight;
float texBottom = tilesIter->texBottom;
int storage = tilesIter->storagePos;
int v = this->vertexArray[(int)layer][storage].v;
int c = this->vertexArray[(int)layer][storage].c;
int t = this->vertexArray[(int)layer][storage].t;
this->vertexArray[(int)layer][storage].vertices[v] = x;
this->vertexArray[(int)layer][storage].vertices[v+1] = y;
this->vertexArray[(int)layer][storage].vertices[v+2] = -layer;
this->vertexArray[(int)layer][storage].vertices[v+3] = x;
this->vertexArray[(int)layer][storage].vertices[v+4] = y + height;
this->vertexArray[(int)layer][storage].vertices[v+5] = -layer;
this->vertexArray[(int)layer][storage].vertices[v+6] = x + width;
this->vertexArray[(int)layer][storage].vertices[v+7] = y + height;
this->vertexArray[(int)layer][storage].vertices[v+8] = -layer;
this->vertexArray[(int)layer][storage].vertices[v+9] = x + width;
this->vertexArray[(int)layer][storage].vertices[v+10] = y;
this->vertexArray[(int)layer][storage].vertices[v+11] = -layer;
this->vertexArray[(int)layer][storage].colors[c] = colorRed;
this->vertexArray[(int)layer][storage].colors[c+1] = colorGreen;
this->vertexArray[(int)layer][storage].colors[c+2] = colorBlue;
this->vertexArray[(int)layer][storage].colors[c+3] = colorAlpha;
this->vertexArray[(int)layer][storage].colors[c+4] = colorRed;
this->vertexArray[(int)layer][storage].colors[c+5] = colorGreen;
this->vertexArray[(int)layer][storage].colors[c+6] = colorBlue;
this->vertexArray[(int)layer][storage].colors[c+7] = colorAlpha;
this->vertexArray[(int)layer][storage].colors[c+8] = colorRed;
this->vertexArray[(int)layer][storage].colors[c+9] = colorGreen;
this->vertexArray[(int)layer][storage].colors[c+10] = colorBlue;
this->vertexArray[(int)layer][storage].colors[c+11] = colorAlpha;
this->vertexArray[(int)layer][storage].colors[c+12] = colorRed;
this->vertexArray[(int)layer][storage].colors[c+13] = colorGreen;
this->vertexArray[(int)layer][storage].colors[c+14] = colorBlue;
this->vertexArray[(int)layer][storage].colors[c+15] = colorAlpha;
this->vertexArray[(int)layer][storage].texCoords[t] = texLeft;
this->vertexArray[(int)layer][storage].texCoords[t+1] = texTop;
this->vertexArray[(int)layer][storage].texCoords[t+2] = texLeft;
this->vertexArray[(int)layer][storage].texCoords[t+3] = texBottom;
this->vertexArray[(int)layer][storage].texCoords[t+4] = texRight;
this->vertexArray[(int)layer][storage].texCoords[t+5] = texBottom;
this->vertexArray[(int)layer][storage].texCoords[t+6] = texRight;
this->vertexArray[(int)layer][storage].texCoords[t+7] = texTop;
this->vertexArray[(int)layer][storage].v += 12;
this->vertexArray[(int)layer][storage].c += 16;
this->vertexArray[(int)layer][storage].t += 8;
}
}
for (int l = this->numberOfLayers-1; l >= 0; l--)
{
for (int i = 0; i < this->numberOfStoragesPerLayer; i++)
{
if (this->vertexArray[l][i].v == 0)
continue;
glVertexPointer(3,GL_FLOAT,0,this->vertexArray[l][i].vertices);
glColorPointer(4,GL_FLOAT,0,this->vertexArray[l][i].colors);
glTexCoordPointer(2,GL_FLOAT,0,this->vertexArray[l][i].texCoords);
glDrawArrays(GL_QUADS,0, (this->vertexArray[l][i].v / 3));
}
}
...
...
And here is the slow VBO Version with only 50 FPS:
INITIALISATION:
...
...
GLenum glewOK = glewInit();
if (glewOK != GLEW_OK)
return;
this->numberOfLayers = 6;
this->numberOfStoragesPerLayer = 8;
this->numberOfMaxQuadPerStorage = 1000;
glGenBuffers(1, this->vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, this->vertexBuffer[0]);
this->bufferSize = ((this->numberOfMaxQuadPerStorage * sizeof(float) * 36) *
this->numberOfStoragesPerLayer *
this->numberOfLayers);
glBufferData(GL_ARRAY_BUFFER, this->bufferSize, NULL, GL_DYNAMIC_DRAW);
vertexIndexer = new int*[this->numberOfLayers];
for (int l=0; l < this->numberOfLayers; l++)
{
vertexIndexer[l] = new int[this->numberOfStoragesPerLayer];
memset(vertexIndexer[l],0, this->numberOfStoragesPerLayer * sizeof(int));
}
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
...
...
MAINLOOP:
...
...
for (int l=0; l < this->numberOfLayers; l++)
memset(vertexIndexer[l],0, this->numberOfStoragesPerLayer * sizeof(int));
float * bufferPtr;
bufferPtr = (float*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
for (int i=0; i < 4;i++)
{
if (screenX[i] == -1 || screenY[i] == -1)
continue;
vector<Tile> * tiles = this->renderPages[screenX[i]][screenY[i]].getTiles();
for (vector<Tile>::iterator tilesIter=tiles->begin(); tilesIter < tiles->end(); tilesIter++)
{
float layer = tilesIter->layer;
float x = tilesIter->x -(*camX);
float y = tilesIter->y -(*camY);
float width = tilesIter->width;
float height = tilesIter->height;
float rectAX1 = x;
float rectAX2 = x+width;
float rectAY1 = y;
float rectAY2 = y+height;
float rectBX1 = 0.0f;
float rectBX2 = this->screenWidth;
float rectBY1 = 0.0f;
float rectBY2 = this->screenHeight;
if (!(rectAX2 >= rectBX1 &&
rectAX1 <= rectBX2 &&
rectAY2 >= rectBY1 &&
rectAY1 <= rectBY2))
continue;
float colorRed = tilesIter->rgbaColor.red;
float colorGreen = tilesIter->rgbaColor.green;
float colorBlue = tilesIter->rgbaColor.blue;
float colorAlpha = tilesIter->rgbaColor.alpha;
float texLeft = tilesIter->texLeft;
float texTop = tilesIter->texTop;
float texRight = tilesIter->texRight;
float texBottom = tilesIter->texBottom;
int storage = tilesIter->storagePos;
int layerOffset = (int)layer * (this->numberOfMaxQuadPerStorage * 36 * this->numberOfStoragesPerLayer);
int storageOffset = layerOffset + (storage * this->numberOfMaxQuadPerStorage * 36);
int tileIndex = storageOffset + (vertexIndexer[(int)layer][storage] * 36);
//1 Vertice
bufferPtr[tileIndex] = x;
bufferPtr[tileIndex + 1] = y;
bufferPtr[tileIndex + 2] = -layer;
bufferPtr[tileIndex + 3] = colorRed;
bufferPtr[tileIndex + 4] = colorGreen;
bufferPtr[tileIndex + 5] = colorBlue;
bufferPtr[tileIndex + 6] = colorAlpha;
bufferPtr[tileIndex + 7] = texLeft;
bufferPtr[tileIndex + 8] = texTop;
//2 Vertice
bufferPtr[tileIndex + 9] = x;
bufferPtr[tileIndex + 10] = y + height;
bufferPtr[tileIndex + 11] = -layer;
bufferPtr[tileIndex + 12] = colorRed;
bufferPtr[tileIndex + 13] = colorGreen;
bufferPtr[tileIndex + 14] = colorBlue;
bufferPtr[tileIndex + 15] = colorAlpha;
bufferPtr[tileIndex + 16] = texLeft;
bufferPtr[tileIndex + 17] = texBottom;
//3 Vertice
bufferPtr[tileIndex + 18] = x + width;
bufferPtr[tileIndex + 19] = y + height;
bufferPtr[tileIndex + 20] = -layer;
bufferPtr[tileIndex + 21] = colorRed;
bufferPtr[tileIndex + 22] = colorGreen;
bufferPtr[tileIndex + 23] = colorBlue;
bufferPtr[tileIndex + 24] = colorAlpha;
bufferPtr[tileIndex + 25] = texRight;
bufferPtr[tileIndex + 26] = texBottom;
//4 Vertice
bufferPtr[tileIndex + 27] = x + width;
bufferPtr[tileIndex + 28] = y;
bufferPtr[tileIndex + 29] = -layer;
bufferPtr[tileIndex + 30] = colorRed;
bufferPtr[tileIndex + 31] = colorGreen;
bufferPtr[tileIndex + 32] = colorBlue;
bufferPtr[tileIndex + 33] = colorAlpha;
bufferPtr[tileIndex + 34] = texRight;
bufferPtr[tileIndex + 35] = texTop;
vertexIndexer[(int)layer][storage]++;
}
}
glUnmapBuffer(GL_ARRAY_BUFFER);
for (int l = this->numberOfLayers-1; l >= 0; l--)
{
for (int i = 0; i < this->numberOfStoragesPerLayer; i++)
{
if (vertexIndexer[l][i] == 0)
continue;
int layerOffset = l * (this->numberOfMaxQuadPerStorage * 36 * this->numberOfStoragesPerLayer *sizeof(float));
int storageOffset = layerOffset + (i * this->numberOfMaxQuadPerStorage * 36 * sizeof(float));
glVertexPointer(3,GL_FLOAT,9*sizeof(float),BUFFER_OFFSET(storageOffset));
glColorPointer(4,GL_FLOAT,9*sizeof(float),BUFFER_OFFSET(storageOffset+3*sizeof(float)));
glTexCoordPointer(2,GL_FLOAT,9*sizeof(float),BUFFER_OFFSET(storageOffset+7*sizeof(float)));
glDrawArrays(GL_QUADS,0, vertexIndexer[l][i]*4);
}
}
...
...
Can somebody see what I do wrong or where my bottleneck could be ?
Thanks and Greetz
fbrjogl