I am not trying to batch my sprites together to reduce draw calls. I wrote these functions to sort my sprites from back to front based on z index then by the texture id.
static void cg_sprite_swap_func(cg_sprite** a, cg_sprite** b) {
cg_sprite* tp = *a;
*a = *b;
*b = tp;
}
static int greater(cg_sprite*a, cg_sprite*b) {
if(a->z_index < b->z_index) {
return 0;
}
else if (a->z_index > b->z_index) {
return 1;
}
if(a->texture_id > b->texture_id) {
return 0;
}
else if(a->texture_id < b->texture_id) {
return 1;
}
return -1;
}
void cg_sprite_back_2_front_tex_id(cg_sprite** a, int count) {
cg_sprite* l, *r;
for (int i = 0; i < count; i++) {
int swap = 0;
for (int j = 0; j < (count - i - 1); j++) {
if(greater(a[j], a[j+1])) {
cg_sprite_swap_func(&a[j], &a[j + 1]);
swap++;
}
}
if (swap == 0) {
break;
}
}
}
I am currently updating my sprites like this:
cg_sprite_back_2_front_tex_id(sprites, sc);
for (int i = 0; i < sc; i++) {
sp = sprites[i];
vmathT3MakeIdentity(&rot);
vmathT3MakeIdentity(&scal);
vmathT3MakeIdentity(&trns);
vmathT3MakeIdentity(&tmp);
vmathT3MakeScale(&scal, &sp->scale);
vmathT3MakeRotationZYX(&rot, &sp->angl);
vmathT3MakeTranslation(&trns, &sp->pos);
vmathT3Mul(&tmp, &trns, &scal); // scale then trnslate
vmathT3Mul(&tmp, &tmp, &rot); // scale then translate then rotate
vmathM4MakeFromT3(&sprites[i]->m_mat, &tmp);
cg_quad_getquadverts(&sp->iv0, &sp->iv1, &sp->iv2, &sp->iv3, sp->quad);
vmathM4MulV4(&sp->ov0, &sp->m_mat, &sp->iv0);
vmathM4MulV4(&sp->ov1, &sp->m_mat, &sp->iv1);
vmathM4MulV4(&sp->ov2, &sp->m_mat, &sp->iv2);
vmathM4MulV4(&sp->ov3, &sp->m_mat, &sp->iv3);
}
after this all the sprites are sorted, updated and transformed on the cpu ready to be sent to the gpu. sc == sprite count. After all that work I fall back to rendering one at a time.
for (int i = 0; i < sc; i++) {
sp = sprites[i];
idx = 0;
// v0
v_buff[idx++] = sp->ov0.x;
v_buff[idx++] = sp->ov0.y;
v_buff[idx++] = sp->ov0.z;
v_buff[idx++] = sp->quad->colors[0];
v_buff[idx++] = sp->quad->colors[1];
v_buff[idx++] = sp->quad->colors[2];
v_buff[idx++] = sp->quad->colors[3];
v_buff[idx++] = sp->quad->tex_coords[0];
v_buff[idx++] = sp->quad->tex_coords[1];
// v1
v_buff[idx++] = sp->ov1.x;
v_buff[idx++] = sp->ov1.y;
v_buff[idx++] = sp->ov1.z;
v_buff[idx++] = sp->quad->colors[4];
v_buff[idx++] = sp->quad->colors[5];
v_buff[idx++] = sp->quad->colors[6];
v_buff[idx++] = sp->quad->colors[7];
v_buff[idx++] = sp->quad->tex_coords[2];
v_buff[idx++] = sp->quad->tex_coords[3];
// v2
v_buff[idx++] = sp->ov2.x;
v_buff[idx++] = sp->ov2.y;
v_buff[idx++] = sp->ov2.z;
v_buff[idx++] = sp->quad->colors[8];
v_buff[idx++] = sp->quad->colors[9];
v_buff[idx++] = sp->quad->colors[10];
v_buff[idx++] = sp->quad->colors[11];
v_buff[idx++] = sp->quad->tex_coords[4];
v_buff[idx++] = sp->quad->tex_coords[5];
// v3
v_buff[idx++] = sp->ov3.x;
v_buff[idx++] = sp->ov3.y;
v_buff[idx++] = sp->ov3.z;
v_buff[idx++] = sp->quad->colors[12];
v_buff[idx++] = sp->quad->colors[13];
v_buff[idx++] = sp->quad->colors[14];
v_buff[idx++] = sp->quad->colors[15];
v_buff[idx++] = sp->quad->tex_coords[6];
v_buff[idx++] = sp->quad->tex_coords[7];
glUseProgram(ce_get_default_shader()->shader_program);
glBindVertexArray(vao);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LEQUAL);
glClearDepth(1.0f);
glActiveTexture(GL_TEXTURE0);
glUniform1i(tex_loc, 0);
glBindTexture(GL_TEXTURE_2D, sp->texture_id);
cg_cam_get_matrices(&v_mat, &p_mat, &mvp_mat, ce_get_default_camera());
glUniformMatrix4fv(model_mat_loc, 1, GL_FALSE,
vmathM4GetData(&sp->m_mat));
glUniformMatrix4fv(view_mat_loc, 1, GL_FALSE, vmathM4GetData(v_mat));
glUniformMatrix4fv(proj_mat_loc, 1, GL_FALSE, vmathM4GetData(p_mat));
glUniformMatrix4fv(mvp_matrix_loc, 1, GL_FALSE,
vmathM4GetData(mvp_mat));
glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
glBufferData(GL_ARRAY_BUFFER, (vbo_size_in_bytes), v_buff,
GL_STREAM_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ind_buff);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_ind(),
i_buff, GL_STREAM_DRAW);
glDrawElements(GL_TRIANGLES, sc * cg_sprite_get_vert_count(),
GL_UNSIGNED_SHORT, 0);
glBindVertexArray(0);
debug_opengl("render loop");
}
the thing is currently the data is organized in such a way that I should be able to batch them up and render in larger groups than 1 at a time. The basic algorithm I have is, check if current sprites tex_id is the same as the previous texture id. If not send the buffer to the gpu and draw with the current bound texture id. If the texture id’s are the same put the data into the buffer until the texture id’s are no longer the same then render.
Is this the way that geometry batching is implemented?
Are there any examples that anyone can share?