building a geometry buffer

blubee · January 11, 2016, 11:47am

I am having some trouble building a buffer and sending the data over to the gpu in a single draw call instead of multiple draw calls.

I might be having issues in multiple places but the place that I am pretty sure is concatinating the smaller buffers into a larger one. vbo setup looks like this.

this is the quad and sprite data structures.


typedef struct {
    vec3 angles;
    GLshort vertex_count;
    GLfloat vertices[12];
    GLfloat colors[16];
    GLshort indices[6];
    GLfloat tex_coords[8];
} cg_quad;

typedef struct sprite {
    cg_quad* quad;
    vec3 scale;
    vec3 pos;
    vec3 angl;
    mat4 m_mat;
    GLuint texture_id;
}cg_sprite;

this is how i init my vbo and try to setup the gpu memory for the amount of sprites that I want to draw in one call.


    v_buff = (float*)calloc(1, (sizeof(float*) * 48 * SPRITE_COUNT)); //allocating space for vertices
    c_buff = (float*)calloc(1, (sizeof(float*) * 64 * SPRITE_COUNT));
    t_buff = (float*)calloc(1, (sizeof(float*) * 32 * SPRITE_COUNT));

    glGenBuffers(1, &vao);
    glBindVertexArray(vao);

    glEnableVertexAttribArray(0);
    glGenBuffers(1, &vert_buff);
    glBindBuffer(GL_ARRAY_BUFFER, vert_buff); //12 floats
    glBufferData(GL_ARRAY_BUFFER,                      //SRITE_COUNT is 2 atm.
                 SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), //is this the correct way to do this?
                 sprites[0]->quad->vertices, GL_STREAM_DRAW);
    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(GLfloat),  
                          (GLvoid*)0);

    glEnableVertexAttribArray(1);
    glGenBuffers(1, &col_buff);
    glBindBuffer(GL_ARRAY_BUFFER, col_buff);
    glBufferData(GL_ARRAY_BUFFER,
                 SPRITE_COUNT * sizeof(sprites[0]->quad->colors), //16 floats
                 sprites[0]->quad->colors, GL_STREAM_DRAW);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat),
                          (GLvoid*)0);

    glGenBuffers(1, &ind_buff);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ind_buff);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(sprites[0]->quad->indices), //4shorts
                 sprites[0]->quad->indices, GL_STATIC_DRAW);

    glBindBuffer(GL_ARRAY_BUFFER, 0);
    glBindVertexArray(0);

void copy_float_buffer(void* dest, size_t dest_index, void* src,
                       size_t src_index, size_t num_to_copy) {
    memcpy(&dest[dest_index], &src[src_index], num_to_copy * sizeof(float));
}

for (int i = 0; i < SPRITE_COUNT; i++) {
        vmathT3MakeIdentity(&rot);
        vmathT3MakeIdentity(&scal);
        vmathT3MakeIdentity(&trns);
        vmathT3MakeIdentity(&tmp);

        vmathT3MakeScale(&scal, &sprites[i]->scale);
        vmathT3MakeRotationZYX(&rot, &sprites[i]->angl);
        vmathT3MakeTranslation(&trns, &sprites[i]->pos);
        vmathT3Mul(&tmp, &trns, &scal);  // scale then trnslate
        vmathT3Mul(&tmp, &tmp, &rot);    // scale then translate then rotate

        vmathM4MakeFromT3(&sprites[i]->m_mat, &tmp);

        cg_quad_getquadverts(&iv0, &iv1, &iv2, &iv3, sprites[i]->quad); // here I grab each of the 4 
        vmathM4MulV4(&ov0, &sprites[i]->m_mat, &iv0);                        //vertex from the quad
        vmathM4MulV4(&ov1, &sprites[i]->m_mat, &iv1);                        //then multiply them on the cpu
        vmathM4MulV4(&ov2, &sprites[i]->m_mat, &iv2);
        vmathM4MulV4(&ov3, &sprites[i]->m_mat, &iv3);
        cg_quad_fillvertsbuff(v_buffer, &ov0, &ov1, &ov2, &ov3);  //fill array puts the vertices in a temp
                                                                                                    //array x,y,z,x1,y1,z1,ect

        cg_quad_fillcolsbuff(c_buffer, sprites[i]->quad);                //same for colors
        cg_quad_filltexbuff(t_buffer, sprites[i]->quad);                   //and textures

        copy_float_buffer(v_buff, (sizeof(float) * i) * 12, v_buffer,  //i want to copy those 2 small buffers
                          (sizeof(float) * i) * 12, 12);                               //into the larger one to send to the gpu
        copy_float_buffer(c_buff, (sizeof(float) * i) * 12, c_buffer,
                          (sizeof(float) * i) * 12, 12);
}


//then setup my matrices and send them over to the gpu
    glUniformMatrix4fv(view_mat_loc, 1, GL_FALSE, vmathM4GetData(&v_mat));
    glUniformMatrix4fv(proj_mat_loc, 1, GL_FALSE, vmathM4GetData(&p_mat));
    glUniformMatrix4fv(mvp_matrix_loc, 1, GL_FALSE, vmathM4GetData(&mvp_mat));

    glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
    glBufferData(GL_ARRAY_BUFFER,
                 SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), v_buff,
                 GL_STREAM_DRAW);

    glBindBuffer(GL_ARRAY_BUFFER, col_buff);
    glBufferData(GL_ARRAY_BUFFER,
                 SPRITE_COUNT * sizeof(sprites[0]->quad->colors), c_buff,
                 GL_STREAM_DRAW);

//draw
    glDrawElements(GL_TRIANGLES, SPRITE_COUNT * 6, GL_UNSIGNED_SHORT, 0);

    glBindVertexArray(0);

currently this code, still only draws 1 quad. Can you take a look where I setup the size of the gpu memory, am I doing that part correctly? Could I make the large buffer in a more efficient way? Is my drawing command correct? Is anything standing out why it would still only be drawing 1 quad instead of two?

GClements · January 11, 2016, 5:03pm

blubee;1281097:


    glBufferData(GL_ARRAY_BUFFER,                      //SRITE_COUNT is 2 atm.
                 SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), //is this the correct way to do this?
                 sprites[0]->quad->vertices, GL_STREAM_DRAW);

No it isn’t the correct way to do it. sprites[0]->quad->vertices only points to the vertices of a single quad. The data for the second quad doesn’t follow it.

But why are you even trying to upload the data here when you overwrite it with v_buff just before the draw call?

Note that it doesn’t matter what’s in the buffer (or whether it even has a data store) at the point that you call glVertexAttribPointer().

Also:

blubee;1281097:


        copy_float_buffer(v_buff, (sizeof(float) * i) * 12, v_buffer,  //i want to copy those 2 small buffers
                          (sizeof(float) * i) * 12, 12);                               //into the larger one to send to the gpu
        copy_float_buffer(c_buff, (sizeof(float) * i) * 12, c_buffer,
                          (sizeof(float) * i) * 12, 12);

Shouldn’t the source index be zero? Do v_buffer/c_buffer even contain more than one sprite’s worth of data?

blubee · January 11, 2016, 10:20pm

Are you saying when I setup my vbo initially it should point to the v_buff memory? I do overwrite the position of sprite[0]->quad->vertices because the actual memory location will be in v_buff. It’s storage space is calculate


    glBufferData(GL_ARRAY_BUFFER, SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), sprites[0]->quad->vertices, GL_STREAM_DRAW);

SPRITE_COUNT *  sizeof(sprites[0]->quad->vertices)  = 96;  2 * 48;

When setting up the vbo after I set the size of the space required, then I should pass the actual cpu memory location of the data? In that case it should look like this:


    glBufferData(GL_ARRAY_BUFFER, SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), v_buff, GL_STREAM_DRAW);

I made those edits above and now my vbo init code looks like this:


    glGenBuffers(1, &vao);
    glBindVertexArray(vao);


    glEnableVertexAttribArray(0);
    glGenBuffers(1, &vert_buff);
    glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
    glBufferData(GL_ARRAY_BUFFER, SPRITE_COUNT * 48, v_buff, GL_STREAM_DRAW);
    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(GLfloat), (GLvoid*)0);


    glEnableVertexAttribArray(1);
    glGenBuffers(1, &col_buff);
    glBindBuffer(GL_ARRAY_BUFFER, col_buff);
    glBufferData(GL_ARRAY_BUFFER, SPRITE_COUNT * 64, c_buff, GL_STREAM_DRAW);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat), (GLvoid*)0);


    glGenBuffers(1, &ind_buff);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ind_buff);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(sprites[0]->quad->indices),
                 sprites[0]->quad->indices, GL_STREAM_DRAW);


    glBindBuffer(GL_ARRAY_BUFFER, 0);
    glBindVertexArray(0);

this code will start the index at zero and loop the num_sprites. It’s in a for loop;


for(int i = 0; i < SPRITE_COUNT; i++) {
        // take each vertex, multiply it by the matrix, pack each vertex into a v_buffer. Take each v_buffer and concat it to the v_buff with enough storage for sizeof(sprite[i]->quad->vertices) * SPRITE_COUNT;


        //this code will scale to the amount of sprites in the loop. I simplified it a bit, it copies 48 bytes to the destination buffer from the src buffer, this should happen SPRITE_COUNT.
        copy_float_buffer(v_buff, i * 48, v_buffer, 0, 12); //0th iteration copies 12 float or 48 bytes  to v_buff offset 0; 1th iteration copies 12 float to v_buffer at offset 48
        copy_float_buffer(c_buff, i * 64, c_buffer, 0, 16);

here’s the full loop once more.


    for (int i = 0; i < SPRITE_COUNT; i++) {
        vmathT3MakeIdentity(&rot);
        vmathT3MakeIdentity(&scal);
        vmathT3MakeIdentity(&trns);
        vmathT3MakeIdentity(&tmp);


        vmathT3MakeScale(&scal, &sprites[i]->scale);
        vmathT3MakeRotationZYX(&rot, &sprites[i]->angl);
        vmathT3MakeTranslation(&trns, &sprites[i]->pos);
        vmathT3Mul(&tmp, &trns, &scal);  // scale then trnslate
        vmathT3Mul(&tmp, &tmp, &rot);    // scale then translate then rotate


        vmathM4MakeFromT3(&sprites[i]->m_mat, &tmp);


        cg_quad_getquadverts(&iv0, &iv1, &iv2, &iv3, sprites[i]->quad);
        vmathM4MulV4(&ov0, &sprites[i]->m_mat, &iv0);
        vmathM4MulV4(&ov1, &sprites[i]->m_mat, &iv1);
        vmathM4MulV4(&ov2, &sprites[i]->m_mat, &iv2);
        vmathM4MulV4(&ov3, &sprites[i]->m_mat, &iv3);


        cg_quad_fillvertsbuff(v_buffer, &ov0, &ov1, &ov2, &ov3); //I think I am making my error here, I should directly copy from the object directly into the larger buffer, saving 1 memcpy per loop, but I am not sure.
        cg_quad_fillcolsbuff(c_buffer, sprites[i]->quad);  
        cg_quad_filltexbuff(t_buffer, sprites[i]->quad);


        copy_float_buffer(v_buff, i * 48, v_buffer, 0, 12);
        copy_float_buffer(c_buff, i * 64, c_buffer, 0, 16);
        
        print_buffer(v_buff,   "first vertex");
        print_buffer(v_buff,   "secon vertex");


        glUniformMatrix4fv(model_mat_loc, 1, GL_FALSE,
                           vmathM4GetData(&sprites[i]->m_mat));
    }

blubee · January 11, 2016, 11:12pm

I wrote this function to try to debug my output. After I transform the vertices on the cpu I print them out like this:


void print_vec3(vec4* a, vec4* b, vec4* c, vec4* d, char* tag) {
    printf(
        " %s              
 "
        "      v0 v1 v2 v3
 "
        "-------------------------------------
"
        "x   %3.0f %3.0f %3.0f %3.0f 
 "
        "y   %3.0f %3.0f %3.0f %3.0f 
 "
        "z   %3.0f %3.0f %3.0f %3.0f 
 "
        "w   %3.0f %3.0f %3.0f %3.0f 


",
        tag, 
         a->x, b->x, c->x, d->x,
         a->y, b->y, c->y, d->y,
         a->z, b->z, c->z, d->z,
         a->w, b->w, c->w, d->w);
}

void print_vbuff(float* i, char* tag) {
    printf(
        " %s                                  
"
        "      v0 v1 v2 v3                    
"
        "-------------------------------------
"
        "x0   %3.0f %3.0f %3.0f %3.0f 
 "
        "y0   %3.0f %3.0f %3.0f %3.0f 
 "
        "z0   %3.0f %3.0f %3.0f %3.0f 
"
        "x1   %3.0f %3.0f %3.0f %3.0f 
 "
        "y1   %3.0f %3.0f %3.0f %3.0f 
 "
        "z1   %3.0f %3.0f %3.0f %3.0f 


 ",
        tag, 
        i[0],i[3],i[6],i[9],
        i[1],i[4],i[7],i[10],
        i[2],i[5],i[8],i[11],

        i[12],i[15],i[18],i[21],
        i[13],i[16],i[19],i[22],
        i[14],i[17],i[20],i[23]);
}

in my code I do this:


    for (int i = 0; i < SPRITE_COUNT; i++) {
        vmathT3MakeIdentity(&rot);
        vmathT3MakeIdentity(&scal);
        vmathT3MakeIdentity(&trns);
        vmathT3MakeIdentity(&tmp);

        vmathT3MakeScale(&scal, &sprites[i]->scale);
        vmathT3MakeRotationZYX(&rot, &sprites[i]->angl);
        vmathT3MakeTranslation(&trns, &sprites[i]->pos);
        vmathT3Mul(&tmp, &trns, &scal);  // scale then trnslate
        vmathT3Mul(&tmp, &tmp, &rot);    // scale then translate then rotate

        vmathM4MakeFromT3(&sprites[i]->m_mat, &tmp);

        cg_quad_getquadverts(&iv0, &iv1, &iv2, &iv3, sprites[i]->quad);
        vmathM4MulV4(&ov0, &sprites[i]->m_mat, &iv0);
        vmathM4MulV4(&ov1, &sprites[i]->m_mat, &iv1);
        vmathM4MulV4(&ov2, &sprites[i]->m_mat, &iv2);
        vmathM4MulV4(&ov3, &sprites[i]->m_mat, &iv3);

        cg_quad_fillvertsbuff(v_buffer, &ov0, &ov1, &ov2, &ov3);
        cg_quad_fillcolsbuff(c_buffer, sprites[i]->quad);
        cg_quad_filltexbuff(t_buffer, sprites[i]->quad);

        copy_float_buffer(v_buff, (i*48), v_buffer, 0, 12);
        copy_float_buffer(c_buff, (i*64), c_buffer, 0, 16);

        printf("index:%d ",i);
        print_vec3(&ov0, &ov1, &ov1, &ov2, "output vectors");
        printf("index:%d ",i);
        print_vbuff(v_buff, "v_buffer");

that code gives me the following output


 index:0  output vectors
       v0 v1 v2 v3
 -------------------------------------
x   -50 -50 -50  50
 y   -50  50  50  50
 z     0   0   0   0
 w     1   1   1   1


index:0  v_buffer
      v0 v1 v2 v3
-------------------------------------
x0   -50 -50  50  50
 y0   -50  50  50 -50
 z0     0   0   0   0
 x1   -150 -150 -50 -50
 y1   -100   0   0 -100
 z1     0   0   0   0


 index:1  output vectors
       v0 v1 v2 v3
 -------------------------------------
x   -150 -150 -150 -50
 y   -100   0   0   0
 z     0   0   0   0
 w     1   1   1   1


index:1  v_buffer
      v0 v1 v2 v3
-------------------------------------
x0   -50 -50  50  50
 y0   -50  50  50 -50
 z0     0   0   0   0
 x1   -150 -150 -50 -50
 y1   -100   0   0 -100
 z1     0   0   0   0

so this let’s me know for sure that the data is coming out correctly, because if I press keys to move the sprites, the one visible one moves and it’s output vertex positions updates accordingly. I am just not sure where thing are getting mixed up. Did I setup my vbo data correctly?

GClements · January 12, 2016, 3:38am

Whatever data you want to used needs to be stored in the buffer before the draw call.

You can create the buffer (by binding it) then call glVertexAttribPointer() without first creating a data store with glBufferData(). glVertexAttribPointer() records the name (handle) of the buffer, not its contents.

Do v_buff etc contain valid data at that point? If they don’t, there’s no point in copying it into the buffer.

There are three main ways to get the data into the buffer:

Create a data store containing the data by calling glBufferData() with a pointer to the data. If the buffer already has a data store, it will be replaced.
Initially create a data store with the correct size but don’t copy any data into it, by calling glBufferData() with a null pointer. Once the data is available, copy it into the existing data store with glBufferSubData().
Initially create a data store with the correct size but don’t copy the data into it, by calling glBufferData() with a null pointer. Map the data store with glMapBuffer() then store the data directly in the mapped region (either by copying it there with e.g. memcpy() or by simply creating the data there). Unmap the data store with glUnmapBuffer() before the draw call.

blubee · January 12, 2016, 4:40am

[QUOTE=GClements;1281118]Whatever data you want to used needs to be stored in the buffer before the draw call.

You can create the buffer (by binding it) then call glVertexAttribPointer() without first creating a data store with glBufferData(). glVertexAttribPointer() records the name (handle) of the buffer, not its contents.

Do v_buff etc contain valid data at that point? If they don’t, there’s no point in copying it into the buffer.

There are three main ways to get the data into the buffer:

Create a data store containing the data by calling glBufferData() with a pointer to the data. If the buffer already has a data store, it will be replaced.
Initially create a data store with the correct size but don’t copy any data into it, by calling glBufferData() with a null pointer. Once the data is available, copy it into the existing data store with glBufferSubData().
Initially create a data store with the correct size but don’t copy the data into it, by calling glBufferData() with a null pointer. Map the data store with glMapBuffer() then store the data directly in the mapped region (either by copying it there with e.g. memcpy() or by simply creating the data there). Unmap the data store with glUnmapBuffer() before the draw call.[/QUOTE]

I think right now I am trying to do version number 2:

here’s creating the vbo with a null pointer

//setting up the buffers to hold concat vertex and color data
v_buff = (float*)calloc(
    1, (sizeof(float) * sizeof(sprites[0]->quad->vertices) * SPRITE_COUNT));
c_buff = (float*)calloc(
    1, (sizeof(float) * sizeof(sprites[0]->quad->colors) * SPRITE_COUNT));
t_buff = (float*)calloc(
    1,
    (sizeof(float) * sizeof(sprites[0]->quad->tex_coords) * SPRITE_COUNT));
i_buff = (short*)calloc(
    1, (sizeof(short) * sizeof(sprites[0]->quad->indices) * SPRITE_COUNT));

glGenBuffers(1, &vao);
glBindVertexArray(vao);

glEnableVertexAttribArray(0);
glGenBuffers(1, &vert_buff);
glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
glBufferData(GL_ARRAY_BUFFER,
             SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), v_buff,
             GL_STREAM_DRAW);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(GLfloat),
                      (GLvoid*)0);

glEnableVertexAttribArray(1);
glGenBuffers(1, &col_buff);
glBindBuffer(GL_ARRAY_BUFFER, col_buff);
glBufferData(GL_ARRAY_BUFFER,
             SPRITE_COUNT * sizeof(sprites[0]->quad->colors), c_buff,
             GL_STREAM_DRAW);
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat),
                      (GLvoid*)0);

glGenBuffers(1, &ind_buff);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ind_buff);
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
             SPRITE_COUNT * sizeof(sprites[0]->quad->indices), i_buff,
             GL_STREAM_DRAW);

glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);

then I create the data and copy it to the data store: v_buff, c_buff, i_buff

void variable_render(double alpha) {
    glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

    glUseProgram(ce_get_default_shader()->shader_program);
    glBindVertexArray(vao);

    //--------------- update vertex data ---------------------
    for (int i = 0; i < SPRITE_COUNT; i++) {
        vmathT3MakeIdentity(&rot);
        vmathT3MakeIdentity(&scal);
        vmathT3MakeIdentity(&trns);
        vmathT3MakeIdentity(&tmp);

        vmathT3MakeScale(&scal, &sprites[i]->scale);
        vmathT3MakeRotationZYX(&rot, &sprites[i]->angl);
        vmathT3MakeTranslation(&trns, &sprites[i]->pos);
        vmathT3Mul(&tmp, &trns, &scal);  // scale then trnslate
        vmathT3Mul(&tmp, &tmp, &rot);    // scale then translate then rotate

        vmathM4MakeFromT3(&sprites[i]->m_mat, &tmp);


        cg_quad_getquadverts(&i0[i], &i1[i], &i2[i], &i3[i], sprites[i]->quad);
        vmathM4MulV4(&o0[i], &sprites[i]->m_mat, &i0[i]);
        vmathM4MulV4(&o1[i], &sprites[i]->m_mat, &i1[i]);
        vmathM4MulV4(&o2[i], &sprites[i]->m_mat, &i2[i]);
        vmathM4MulV4(&o3[i], &sprites[i]->m_mat, &i3[i]);
        //above multiply each vertex by the model transformation matrix -----
        
        v_buff[(i * 12) + 0] = o0[i].x; //copy over vertex data
        v_buff[(i * 12) + 1] = o0[i].y;
        v_buff[(i * 12) + 2] = o0[i].z;

        v_buff[(i * 12) + 3] = o1[i].x;
        v_buff[(i * 12) + 4] = o1[i].y;
        v_buff[(i * 12) + 5] = o1[i].z;

        v_buff[(i * 12) + 6] = o2[i].x;
        v_buff[(i * 12) + 7] = o2[i].y;
        v_buff[(i * 12) + 8] = o2[i].z;

        v_buff[(i * 12) + 9] = o3[i].x;
        v_buff[(i * 12) + 10] = o3[i].y;
        v_buff[(i * 12) + 11] = o3[i].z;

        c_buff[(i * 16) + 0] = sprites[i]->quad->colors[0]; //color
        c_buff[(i * 16) + 1] = sprites[i]->quad->colors[1];
        c_buff[(i * 16) + 2] = sprites[i]->quad->colors[2];
        c_buff[(i * 16) + 3] = sprites[i]->quad->colors[3];
        c_buff[(i * 16) + 4] = sprites[i]->quad->colors[4];
        c_buff[(i * 16) + 5] = sprites[i]->quad->colors[5];
        c_buff[(i * 16) + 6] = sprites[i]->quad->colors[6];
        c_buff[(i * 16) + 7] = sprites[i]->quad->colors[7];
        c_buff[(i * 16) + 8] = sprites[i]->quad->colors[8];
        c_buff[(i * 16) + 9] = sprites[i]->quad->colors[9];
        c_buff[(i * 16) + 10] = sprites[i]->quad->colors[10];
        c_buff[(i * 16) + 11] = sprites[i]->quad->colors[11];
        c_buff[(i * 16) + 12] = sprites[i]->quad->colors[12];
        c_buff[(i * 16) + 13] = sprites[i]->quad->colors[13];
        c_buff[(i * 16) + 14] = sprites[i]->quad->colors[14];
        c_buff[(i * 16) + 15] = sprites[i]->quad->colors[15];

        i_buff[(i * 6) + 0] = sprites[i]->quad->indices[0]; //indices
        i_buff[(i * 6) + 1] = sprites[i]->quad->indices[1];
        i_buff[(i * 6) + 2] = sprites[i]->quad->indices[2];

        i_buff[(i * 6) + 3] = sprites[i]->quad->indices[3];
        i_buff[(i * 6) + 4] = sprites[i]->quad->indices[4];
        i_buff[(i * 6) + 5] = sprites[i]->quad->indices[5];

//data has already been copied, this printout shows the data that's in v_buff, c_buff, i_buff
//see below for more on this

        print_vbuff(v_buff, SPRITE_COUNT, "v_buffer");
        print_cbuff(c_buff, SPRITE_COUNT, "c_buffer");
        print_ibuff(i_buff, SPRITE_COUNT, "i_buffer");
    }

    vmathM4Mul(&mvp_mat, &p_mat, &v_mat);

    glUniformMatrix4fv(view_mat_loc, 1, GL_FALSE, vmathM4GetData(&v_mat));
    glUniformMatrix4fv(proj_mat_loc, 1, GL_FALSE, vmathM4GetData(&p_mat));
    glUniformMatrix4fv(mvp_matrix_loc, 1, GL_FALSE, vmathM4GetData(&mvp_mat));

//point
    glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
    glBufferData(GL_ARRAY_BUFFER,
                 SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), v_buff,
                 GL_STREAM_DRAW);

    glBindBuffer(GL_ARRAY_BUFFER, col_buff);
    glBufferData(GL_ARRAY_BUFFER,
                 SPRITE_COUNT * sizeof(sprites[0]->quad->colors), c_buff,
                 GL_STREAM_DRAW);

    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ind_buff);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER,
                 SPRITE_COUNT * sizeof(sprites[0]->quad->indices), i_buff,
                 GL_STREAM_DRAW);

    glDrawElements(GL_TRIANGLES, SPRITE_COUNT * sprites[0]->quad->vertex_count,
                   GL_UNSIGNED_SHORT, 0);

    glBindVertexArray(0);
}

these are the helper functions that I wrote to check on the data in each buffer.

void print_vbuff(float* i, int count, char* tag) {
    printf("%s
", tag);
    for (int k = 0; k < count; k++) {
        printf(
            "      v0 v1 v2 v3                    
"
            "-------------------------------------
 "
            "x%d   %3.0f %3.0f %3.0f %3.0f 
 "
            "y%d   %3.0f %3.0f %3.0f %3.0f 
 "
            "z%d   %3.0f %3.0f %3.0f %3.0f 

 ",
            k, i[(12 * k) + 0], i[(12 * k) + 3], i[(12 * k) + 6],
            i[(12 * k) + 9], k, i[(12 * k) + 1], i[(12 * k) + 4],
            i[(12 * k) + 7], i[(12 * k) + 10], k, i[(12 * k) + 2],
            i[(12 * k) + 5], i[(12 * k) + 8], i[(12 * k) + 11]);
    }
    printf("


");
}

void print_cbuff(float* i, int count, char* tag) {
    printf("%s
", tag);
    for (int k = 0; k < count; k++) {
        printf(
            "      v0 v1 v2 v3                    
"
            "-------------------------------------
 "
            "x%d   %3.0f %3.0f %3.0f %3.0f 
 "
            "y%d   %3.0f %3.0f %3.0f %3.0f 
 "
            "z%d   %3.0f %3.0f %3.0f %3.0f 
 "
            "z%d   %3.0f %3.0f %3.0f %3.0f 

 ",
            k, i[(16 * k) + 0], i[(16 * k) + 4], i[(16 * k) + 8],
            i[(16 * k) + 12], k, i[(16 * k) + 1], i[(16 * k) + 5],
            i[(16 * k) + 9], i[(16 * k) + 13], k, i[(16 * k) + 2],
            i[(16 * k) + 6], i[(16 * k) + 10], i[(16 * k) + 14], k,
            i[(16 * k) + 3], i[(16 * k) + 7], i[(16 * k) + 11],
            i[(16 * k) + 15]);
    }
    printf("


");
}

void print_ibuff(short* i, int count, char* tag) {
    printf("%s
", tag);
    for (int k = 0; k < count; k++) {
        printf(
            "      v0 v1                          
"
            "-------------------------------------
 "
            "x%d  %3d %3d 
 "
            "y%d  %3d %3d 
 "
            "z%d  %3d %3d 

 ",
            k, i[(6 * k) + 0], i[(6 * k) + 3], k, i[(6 * k) + 1],
            i[(6 * k) + 4], k, i[(6 * k) + 2], i[(6 * k) + 5]);
    }
    printf("


");
}

and this is an example output after running this program.


v_buffer
      v0 v1 v2 v3
-------------------------------------
 x0   -50 -50  50  50
 y0   -50  50  50 -50
 z0     0   0   0   0

       v0 v1 v2 v3
-------------------------------------
 x1   -50 -50  50  50
 y1   -50  50  50 -50
 z1     0   0   0   0




c_buffer
      v0 v1 v2 v3
-------------------------------------
 x0     1   0   0   1
 y0     0   1   0   1
 z0     0   0   1   0
 z0     1   1   1   1

       v0 v1 v2 v3
-------------------------------------
 x1     1   0   0   1
 y1     0   1   0   1
 z1     0   0   1   0
 z1     1   1   1   1




i_buffer
      v0 v1
-------------------------------------
 x0    0   0
 y0    1   2
 z0    2   3

       v0 v1
-------------------------------------
 x1    0   0
 y1    1   2
 z1    2   3

the output shows that there’s 2 sets of vertex, color and index data. But opengl still only renders 1 quad and I don’t understand why.

GClements · January 12, 2016, 9:59am

blubee;1281119:

here’s creating the vbo with a null pointer


//setting up the buffers to hold concat vertex and color data
v_buff = (float*)calloc(
    1, (sizeof(float) * sizeof(sprites[0]->quad->vertices) * SPRITE_COUNT));


glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
glBufferData(GL_ARRAY_BUFFER,
             SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), v_buff,
             GL_STREAM_DRAW);

v_buff isn’t null at this point. Similarly for the other buffers. But that doesn’t really matter; the data store is getting created, and copying zeros into it doesn’t have any negative effects besides wasting a small amount of time.

blubee;1281119:

then I create the data and copy it to the data store: v_buff, c_buff, i_buff


    glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
    glBufferData(GL_ARRAY_BUFFER,
                 SPRITE_COUNT * sizeof(sprites[0]->quad->vertices), v_buff,
                 GL_STREAM_DRAW);

This works, but it’s creating a new data store, meaning that there was no point in the earlier glBufferData() call. Again, this doesn’t matter beyond inefficiency.

blubee;1281119:

and this is an example output after running this program.


v_buffer
      v0 v1 v2 v3
-------------------------------------
 x0   -50 -50  50  50
 y0   -50  50  50 -50
 z0     0   0   0   0

       v0 v1 v2 v3
-------------------------------------
 x1   -50 -50  50  50
 y1   -50  50  50 -50
 z1     0   0   0   0

Both quads have identical vertex coordinates. Even if it draws both, you’ll only see one because they’re both in the same place.

blubee;1281119:


c_buffer
      v0 v1 v2 v3
-------------------------------------
 x0     1   0   0   1
 y0     0   1   0   1
 z0     0   0   1   0
 z0     1   1   1   1

       v0 v1 v2 v3
-------------------------------------
 x1     1   0   0   1
 y1     0   1   0   1
 z1     0   0   1   0
 z1     1   1   1   1

Both quads have identical colours, so you can’t even tell which one you’re seeing.

blubee;1281119:


i_buffer
      v0 v1
-------------------------------------
 x0    0   0
 y0    1   2
 z0    2   3

       v0 v1
-------------------------------------
 x1    0   0
 y1    1   2
 z1    2   3

Not only do both quads have identical vertex attributes, but the second quad actually is the first quad, because it’s using the same indices. Even if you changed the second set of vertex coordinates and colours, you’d get two identical copies of the first quad.

sprites[i]->quad->indices probably shouldn’t even exist. I’d suggest:


        i_buff[(i * 6) + 0] = (i * 4) + 0;
        i_buff[(i * 6) + 1] = (i * 4) + 1;
        i_buff[(i * 6) + 2] = (i * 4) + 2;
 
        i_buff[(i * 6) + 3] = (i * 4) + 0;
        i_buff[(i * 6) + 4] = (i * 4) + 2;
        i_buff[(i * 6) + 5] = (i * 4) + 3;

[QUOTE=blubee;1281119]
the output shows that there’s 2 sets of vertex, color and index data. But opengl still only renders 1 quad and I don’t understand why.[/QUOTE]
I’m fairly sure that it is rendering both quads, but you can’t tell that because they’re both identical. Do your quads actually have different values in their scale, angl and pos fields?

blubee · January 12, 2016, 1:52pm

[QUOTE=GClements;1281125]v_buff isn’t null at this point. Similarly for the other buffers. But that doesn’t really matter; the data store is getting created, and copying zeros into it doesn’t have any negative effects besides wasting a small amount of time.

This works, but it’s creating a new data store, meaning that there was no point in the earlier glBufferData() call. Again, this doesn’t matter beyond inefficiency.

Both quads have identical vertex coordinates. Even if it draws both, you’ll only see one because they’re both in the same place.

Both quads have identical colours, so you can’t even tell which one you’re seeing.

Not only do both quads have identical vertex attributes, but the second quad actually is the first quad, because it’s using the same indices. Even if you changed the second set of vertex coordinates and colours, you’d get two identical copies of the first quad.

sprites[i]->quad->indices[] probably shouldn’t even exist. I’d suggest:


        i_buff[(i * 6) + 0] = (i * 4) + 0;
        i_buff[(i * 6) + 1] = (i * 4) + 1;
        i_buff[(i * 6) + 2] = (i * 4) + 2;
 
        i_buff[(i * 6) + 3] = (i * 4) + 0;
        i_buff[(i * 6) + 4] = (i * 4) + 2;
        i_buff[(i * 6) + 5] = (i * 4) + 3;

I’m fairly sure that it is rendering both quads, but you can’t tell that because they’re both identical. Do your quads actually have different values in their scale, angl and pos fields?[/QUOTE]

okay so adding that offset works for my vertex buffers, since the indices are all the same I can avoid adding them and see nothing really wrong. This works but I have two questions.


for (int i = 0; i < SPRITE_COUNT; i++) {
    vmathT3MakeIdentity(&rot);
    vmathT3MakeIdentity(&scal);
    vmathT3MakeIdentity(&trns);
    vmathT3MakeIdentity(&tmp);

    vmathT3MakeScale(&scal, &sprites[i]->scale);
    vmathT3MakeRotationZYX(&rot, &sprites[i]->angl);
    vmathT3MakeTranslation(&trns, &sprites[i]->pos);
    vmathT3Mul(&tmp, &trns, &scal);  // scale then trnslate
    vmathT3Mul(&tmp, &tmp, &rot);    // scale then translate then rotate

    vmathM4MakeFromT3(&sprites[i]->m_mat, &tmp);

    cg_quad_getquadverts(&i0[i], &i1[i], &i2[i], &i3[i], sprites[i]->quad);
    vmathM4MulV4(&o0[i], &sprites[i]->m_mat, &i0[i]);
    vmathM4MulV4(&o1[i], &sprites[i]->m_mat, &i1[i]);
    vmathM4MulV4(&o2[i], &sprites[i]->m_mat, &i2[i]);
    vmathM4MulV4(&o3[i], &sprites[i]->m_mat, &i3[i]);

    v_buff[(i * 12) + 0] = i * 4 * o0[i].x; //copy over vertex data
    v_buff[(i * 12) + 1] = i * 4 * o0[i].y;
    v_buff[(i * 12) + 2] = i * 4 * o0[i].z;

    v_buff[(i * 12) + 3] = i * 4 * o1[i].x;
    v_buff[(i * 12) + 4] = i * 4 * o1[i].y;
    v_buff[(i * 12) + 5] = i * 4 * o1[i].z;

    v_buff[(i * 12) + 6] = i * 4 * o2[i].x;
    v_buff[(i * 12) + 7] = i * 4 * o2[i].y;
    v_buff[(i * 12) + 8] = i * 4 * o2[i].z;

    v_buff[(i * 12) + 9]  = i * 4 * o3[i].x;
    v_buff[(i * 12) + 10] = i * 4 * o3[i].y;
    v_buff[(i * 12) + 11] = i * 4 * o3[i].z;

    c_buff[(i * 16) + 0] = sprites[i]->quad->colors[0]; //color
    c_buff[(i * 16) + 1] = sprites[i]->quad->colors[1];
    c_buff[(i * 16) + 2] = sprites[i]->quad->colors[2];
    c_buff[(i * 16) + 3] = sprites[i]->quad->colors[3];
    c_buff[(i * 16) + 4] = sprites[i]->quad->colors[4];
    c_buff[(i * 16) + 5] = sprites[i]->quad->colors[5];
    c_buff[(i * 16) + 6] = sprites[i]->quad->colors[6];
    c_buff[(i * 16) + 7] = sprites[i]->quad->colors[7];
    c_buff[(i * 16) + 8] = sprites[i]->quad->colors[8];
    c_buff[(i * 16) + 9] = sprites[i]->quad->colors[9];
    c_buff[(i * 16) + 10] = sprites[i]->quad->colors[10];
    c_buff[(i * 16) + 11] = sprites[i]->quad->colors[11];
    c_buff[(i * 16) + 12] = sprites[i]->quad->colors[12];
    c_buff[(i * 16) + 13] = sprites[i]->quad->colors[13];
    c_buff[(i * 16) + 14] = sprites[i]->quad->colors[14];
    c_buff[(i * 16) + 15] = sprites[i]->quad->colors[15];

    i_buff[(i * 6) + 0] = sprites[i]->quad->indices[0]; //indices
    i_buff[(i * 6) + 1] = sprites[i]->quad->indices[1];
    i_buff[(i * 6) + 2] = sprites[i]->quad->indices[2];

    i_buff[(i * 6) + 3] = sprites[i]->quad->indices[3];
    i_buff[(i * 6) + 4] = sprites[i]->quad->indices[4];
    i_buff[(i * 6) + 5] = sprites[i]->quad->indices[5];
}

it works but I do not understand why I need to add the i * 4 offset to the vertex positions.
something is off because my objects are suppose to be lined up in a horizontal like but they are making a stair step pattern going up, seems like there’s someone in this code that my vertices are being shifted.

What could be causing the stair stepping effect and what exactly does i * 4 do?

GClements · January 12, 2016, 3:49pm

You don’t.

[QUOTE=blubee;1281129]
2) something is off because my objects are suppose to be lined up in a horizontal like but they are making a stair step pattern going up, seems like there’s someone in this code that my vertices are being shifted.

What could be causing the stair stepping effect and what exactly does i * 4 do?[/QUOTE]
Adding i*4 is causing the stair step.

blubee · January 13, 2016, 2:31am

[QUOTE=GClements;1281132]You don’t.

Adding i*4 is causing the stair step.[/QUOTE]

you were absolutely right, removing those i * 4 and only placing them on the indices buffer worked. That’s awesome but the question still remains. Why does that work?
For example I used the offset in the v_buff and c_buff so that they can properly align.

let’s say I am drawing two sprites.

SPRITE_COUNT = 2
int idx = 0;

for(int i = 0; i < SPRITE_COUNT; i++) {
v_buff[(i * 16) + idx++] = o0[i].x;  
v_buff[(i * 16) + idx++] = o0[i].y;
v_buff[(i * 16) + idx++] = o0[i].z;

v_buff[(i * 16) + idx++] = o1[i].x;
...
v_buff[(i * 16) + idx++] = o2[i].x;
...
v_buff[(i * 16) + idx++] = o3[i].x;
....
}

what that’s doing at least on the left side is placing the vertices in the v_buff in the proper order. On the first iteration they’ll be placed at:

v_buff[0]
v_buff[1]
v_buff[2]

on the second iteration they’ll be placed at:

v_buff[17]
v_buff[18]
v_buff[19]
.....

and so on.

It’s the same idea on the i_buff, on the left hand size at least.
There are 6 indices so they need to be arranged in memory laid out in chunks of 6 from 0-5,6-11, etc…

with the index order
0,1,2
0,2,3

first loop i = 0;
(i * 4) + 0 = 0;
(i * 4) + 1 = 1;
(i * 4) + 2 = 2;

(i * 4) + 0 = 0;
(i * 4) + 2 = 2;
(i * 4) + 3 = 3;

second loop i = 1;
(i * 4) + 0 = 4;
(i * 4) + 1 = 5;
(i * 4) + 2 = 6;

(i * 4) + 0 = 4;
(i * 4) + 2 = 6;
(i * 4) + 3 = 7;

third loop i = 2
(i * 4) + 0 = 8;
(i * 4) + 1 = 9;
(i * 4) + 2 = 10;

(i * 4) + 0 = 8;
(i * 4) + 2 = 10;
(i * 4) + 3 = 11;

I am not really seeing what’s going on, can you help expand on that a bit further?

GClements · January 13, 2016, 6:12am

Either “idx=0” should go inside the loop, or you shouldn’t use “(i * 16)” in the index calculation. As it stands, at the start of the second iteration, i will be 1 and idx will be 16, meaning that you’ll start at offset 32. Also, there are only 12 floats for the vertices, not 16.

[QUOTE=blubee;1281137]
It’s the same idea on the i_buff, on the left hand size at least.
There are 6 indices so they need to be arranged in memory laid out in chunks of 6 from 0-5,6-11, etc…


first loop i = 0;
(i * 4) + 0 = 0;
(i * 4) + 1 = 1;
(i * 4) + 2 = 2;
 
(i * 4) + 0 = 0;
(i * 4) + 2 = 2;
(i * 4) + 3 = 3;

I am not really seeing what’s going on, can you help expand on that a bit further?[/QUOTE]
Each quad is made up of two triangles with three vertices each, so six vertices in total. But each quad only has four distinct vertices; two of the vertices are used in both triangles (the two vertices forming the shared diagonal edge).

So the index buffer is set up so that the first pair of triangles uses the vertices 0 through 3, the second pair uses vertices 4 through 7, and so on.


0 o---o 1   4 o---o 5
  |\  |       |\  |
  | \ |       | \ |
  |  \|       |  \|
3 o---o 2   6 o---o 7

blubee · January 14, 2016, 3:43am

actually I my vertex is arranged like this:

0,1,2,0,2,3 
but they are packed like this 
-0.5, -0.5, 0, 
-0.5 0.5, 0, 
0.5, 0.5, 0, 
0.5, -0.5, 0

I still don’t quite understand the indices order thing. I will just keep on working at it.

This is the current working setup, I wrote a function to sort my cg_sprites i’ll add that at the bottom.

init code

    v_buff =
        (float*)calloc(1, (sizeof(float) * cg_sprite_get_sizeof_vert() * sc));
    c_buff =
        (float*)calloc(1, (sizeof(float) * cg_sprite_get_sizeof_col() * sc));
    t_buff = (float*)calloc(
        1, (sizeof(float) * cg_sprite_get_sizeof_tex_coord() * sc));
    i_buff =
        (short*)calloc(1, (sizeof(short) * cg_sprite_get_sizeof_ind() * sc));

    glGenBuffers(1, &vao);
    glBindVertexArray(vao);

    glEnableVertexAttribArray(0);
    glGenBuffers(1, &vert_buff);
    glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
    glBufferData(GL_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_vert(), v_buff,
                 GL_STREAM_DRAW);
    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(GLfloat),
                          (GLvoid*)0);

    glEnableVertexAttribArray(1);
    glGenBuffers(1, &col_buff);
    glBindBuffer(GL_ARRAY_BUFFER, col_buff);
    glBufferData(GL_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_col(), c_buff,
                 GL_STREAM_DRAW);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat),
                          (GLvoid*)0);

    glEnableVertexAttribArray(2);
    glGenBuffers(1, &tex_buff);
    glBindBuffer(GL_ARRAY_BUFFER, tex_buff);
    glBufferData(GL_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_tex_coord(), t_buff,
                 GL_STREAM_DRAW);
    glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(GLfloat),
                          (GLvoid*)0);

    glGenBuffers(1, &ind_buff);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ind_buff);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_ind(),
                 i_buff, GL_STREAM_DRAW);

    glBindBuffer(GL_ARRAY_BUFFER, 0);
    glBindVertexArray(0);

this is the rendering loop

void variable_render(double alpha) {
    glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

    glEnable(GL_BLEND);
    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);

    glEnable(GL_DEPTH_TEST);
    glDepthFunc(GL_LEQUAL);
    glClearDepth(1.0f);

    glUseProgram(ce_get_default_shader()->shader_program);
    glBindVertexArray(vao);

    cg_sprite_mbubble_sort(sprites, sc);
    for (int i = 0; i < sc; i++) {
        //--------------- start update vertex data ---------------------

        sp = sprites[i];
        vmathT3MakeIdentity(&rot);
        vmathT3MakeIdentity(&scal);
        vmathT3MakeIdentity(&trns);
        vmathT3MakeIdentity(&tmp);

        vmathT3MakeScale(&scal, &sp->scale);
        vmathT3MakeRotationZYX(&rot, &sp->angl);
        vmathT3MakeTranslation(&trns, &sp->pos);
        vmathT3Mul(&tmp, &trns, &scal);  // scale then trnslate
        vmathT3Mul(&tmp, &tmp, &rot);    // scale then translate then rotate

        vmathM4MakeFromT3(&sprites[i]->m_mat, &tmp);

        cg_quad_getquadverts(&iv0, &iv1, &iv2, &iv3, sprites[i]->quad);
        vmathM4MulV4(&ov0, &sp->m_mat, &iv0);
        vmathM4MulV4(&ov1, &sp->m_mat, &iv1);
        vmathM4MulV4(&ov2, &sp->m_mat, &iv2);
        vmathM4MulV4(&ov3, &sp->m_mat, &iv3);
        //--------------- finish update vertex data ---------------------

        //--------------- start packing data into buffers---------------------
        idx = 0;
        v_buff[(i * 12) + idx++] = ov0.x;
        v_buff[(i * 12) + idx++] = ov0.y;
        v_buff[(i * 12) + idx++] = ov0.z;

        v_buff[(i * 12) + idx++] = ov1.x;
        v_buff[(i * 12) + idx++] = ov1.y;
        v_buff[(i * 12) + idx++] = ov1.z;

        v_buff[(i * 12) + idx++] = ov2.x;
        v_buff[(i * 12) + idx++] = ov2.y;
        v_buff[(i * 12) + idx++] = ov2.z;

        v_buff[(i * 12) + idx++] = ov3.x;
        v_buff[(i * 12) + idx++] = ov3.y;
        v_buff[(i * 12) + idx++] = ov3.z;
        idx = 0;

        c_buff[(i * 16) + idx++] = sp->quad->colors[0];
        c_buff[(i * 16) + idx++] = sp->quad->colors[1];
        c_buff[(i * 16) + idx++] = sp->quad->colors[2];
        c_buff[(i * 16) + idx++] = sp->quad->colors[3];
        c_buff[(i * 16) + idx++] = sp->quad->colors[4];
        c_buff[(i * 16) + idx++] = sp->quad->colors[5];
        c_buff[(i * 16) + idx++] = sp->quad->colors[6];
        c_buff[(i * 16) + idx++] = sp->quad->colors[7];
        c_buff[(i * 16) + idx++] = sp->quad->colors[8];
        c_buff[(i * 16) + idx++] = sp->quad->colors[9];
        c_buff[(i * 16) + idx++] = sp->quad->colors[10];
        c_buff[(i * 16) + idx++] = sp->quad->colors[11];
        c_buff[(i * 16) + idx++] = sp->quad->colors[12];
        c_buff[(i * 16) + idx++] = sp->quad->colors[13];
        c_buff[(i * 16) + idx++] = sp->quad->colors[14];
        c_buff[(i * 16) + idx++] = sp->quad->colors[15];
        idx = 0;

        i_buff[(i * 6) + idx++] = i * 4 + sp->quad->indices[0];
        i_buff[(i * 6) + idx++] = i * 4 + sp->quad->indices[1];
        i_buff[(i * 6) + idx++] = i * 4 + sp->quad->indices[2];

        i_buff[(i * 6) + idx++] = i * 4 + sp->quad->indices[3];
        i_buff[(i * 6) + idx++] = i * 4 + sp->quad->indices[4];
        i_buff[(i * 6) + idx++] = i * 4 + sp->quad->indices[5];
        idx = 0;

        t_buff[(i * 4) + idx++] = sp->quad->tex_coords[0];
        t_buff[(i * 4) + idx++] = sp->quad->tex_coords[1];
        t_buff[(i * 4) + idx++] = sp->quad->tex_coords[2];
        t_buff[(i * 4) + idx++] = sp->quad->tex_coords[3];
        idx = 0;
        //--------------- finish packing data into buffers---------------------
    }

    vmathM4Mul(&mvp_mat, &p_mat, &v_mat);

    // projection * view * model * vertex_pos;
    glUniformMatrix4fv(model_mat_loc, 1, GL_FALSE, vmathM4GetData(&sp->m_mat));
    glUniformMatrix4fv(view_mat_loc, 1, GL_FALSE, vmathM4GetData(&v_mat));
    glUniformMatrix4fv(proj_mat_loc, 1, GL_FALSE, vmathM4GetData(&p_mat));
    glUniformMatrix4fv(mvp_matrix_loc, 1, GL_FALSE, vmathM4GetData(&mvp_mat));

    glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
    glBufferData(GL_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_vert(), v_buff,
                 GL_STREAM_DRAW);

    glBindBuffer(GL_ARRAY_BUFFER, col_buff);
    glBufferData(GL_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_col(), c_buff,
                 GL_STREAM_DRAW);

    glBindBuffer(GL_ARRAY_BUFFER, tex_buff);
    glBufferData(GL_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_tex_coord(), t_buff,
                 GL_STREAM_DRAW);

    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ind_buff);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_ind(),
                 i_buff, GL_STREAM_DRAW);

    glDrawElements(GL_TRIANGLES, sc * cg_sprite_get_vert_count(),
                   GL_UNSIGNED_SHORT, 0);

    glBindVertexArray(0);
}

this is the way that I sort those cg_sprites:

static void cg_sprite_swap_func(cg_sprite** a, cg_sprite** b) {
    cg_sprite* tp = *a;
    *a = *b;
    *b = tp;
}

void cg_sprite_mbubble_sort(cg_sprite** a, int count) {
    cg_sprite* l, *r;

    for (int i = 0; i < count; i++) {
        int swap = 0;
        for (int j = 0; j < (count - i - 1); j++) {
            if (a[j]->z_index > a[j + 1]->z_index) {
                cg_sprite_swap_func(&a[j], &a[j + 1]);
                swap++;
            }
        }
        if (swap == 0) {
            break;
        }
    }
}

with this I can render about 6000 cg_sprites in 1 batch and my machine still doesn’t skip a beat so that’s pretty good. The next step would be to create 1 large interleaved buffer to hold all this data instead of 3 or 4 smaller ones.

Now that this is working properly, I can generalize the code to work with other things such as lines or points, since their data will be organized differently. I have to thank you for your help though, I don’t think that I would’ve figured out that offset thing for the indices so fast without your guidance.