Take look on you last loop:
for( int i=0; i<no_runs; i++ )
{
// bind first PBO and simulate buffer filling
::glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo[ 0 ] );
void* p_mem = ::glMapBuffer( GL_PIXEL_UNPACK_BUFFER, GL_WRITE );
::glUnmapBuffer( GL_PIXEL_UNPACK_BUFFER );
// now, bind full buffer and upload it content to texture
::glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo[ 1 ] );
for( int j=0; j<no_sources; j++ )
{
::glBindTexture( GL_TEXTURE_2D, tex[ j ] );
::glTexSubImage2D( GL_TEXTURE_2D, 0, 0, 0, xres, yres, GL_LUMINANCE, GL_UNSIGNED_BYTE, (void*)(image_size*j) );
}
std::swap( pbo[ 0 ], pbo[ 1 ] );
}
glTexSubImage2D call in this case is nonblocking and app gets control as soon as druver post command in queue. But, pbo[1] object is busy until all pending glTexSubImage2D operation is finished. In next loop you are trying to map that buffer and and fill new data. So, driver have to wait…
Here is my async uploader code… I didnt bechmark it…
#pragma once
#include <vector>
#include "glextmap.h" // this is my gl ext loader... use glew if you want
#ifndef IN
#define IN
#endif
#ifndef OUT
#define OUT
#endif
enum eUPState
{
UP_NONE = 0, // default
UP_MAPPED, // mapped. acccessible from other threads
UP_LOCKED, // locked in some thread
UP_FULL, // full
UP_UNMAPPED, // unmapped, render thread copy its content to texure
UP_PENDING // pending... map it in next frame
};
class CUploadPBOPool;
class CPBO
{
public:
CPBO();
virtual ~CPBO();
// size = size of single PBO
bool Init(int size);
// Cleanup
bool Done();
// get state of PBO
eUPState GetState();
// map and unmap PBO
bool Map();
bool Unmap();
// get pointer to mapped memory
unsigned char* Lock();
bool Unlock(int used, void* userID);
// get size of PBO
unsigned int GetMaxSize();
// Bind the PBO
void Bind();
protected:
friend class CUploadPBOPool;
GLuint id;
unsigned char* data;
int maxsize;
int usedsize;
eUPState state;
void* userID;
};
class CUploadPBOPool
{
public:
CUploadPBOPool(void);
virtual ~CUploadPBOPool(void);
// create pool witn num_buffers PBO's each with max_size size
bool CreateUploadPBOPool(IN int num_buffers, IN unsigned int max_size);
// destroy PBO pool
bool DeleteUploadPBOPool();
// Lock first avaible PBO.. pass pointer to resulting pointer and pointer to size of locked buffer
// returns index of locked PBO
// this method should be called from another thread
int Lock(OUT unsigned char** data, OUT unsigned int* pSize);
// unlock previously locked PBO.. pass return value from Lock call
// this method should be called from another thread
// use userID to identify your buffer later in ProcessPBOData
bool Unlock(IN int index, IN void* userID, IN int used);
// Update... call once per frame from render thread
bool UpdateUploadPBOPool();
protected:
CCritSec m_LockPBOs;
std::vector<CPBO> m_PBOs;
// override this to upload data to texture. use userID to find where those data belongs in your app
virtual void ProcessPBOData(unsigned char* data, unsigned int datasize, void* userID) {};
};
#include "StdAfx.h"
#include "UploadPBOPool.h"
#include "Log.h"
#ifdef _DEBUG
#define GLCALL(a) {(a); glerr(#a, __LINE__);}
#else
#define GLCALL(a) a
#endif
static void glerr(char *str, int line)
{
int err;
err=glGetError();
if (err!=0)
{
Log.AddLine(__FILE__, "GLERR: [%5d] %s: %s", line, str, gluErrorString(err));
}
}
CPBO::CPBO():
id(0),
data(NULL),
maxsize(0),
usedsize(0),
state(UP_NONE),
userID(0)
{
}
CPBO::~CPBO()
{
}
bool CPBO::Init( int size )
{
GLCALL(glGenBuffers(1, &id));
GLCALL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, id));
GLCALL(glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, size, NULL, GL_STATIC_DRAW));
//Log.Info("CPBO::Init %d", id);
maxsize = size;
usedsize = 0;
return true;
}
void CPBO::Bind()
{
GLCALL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, id));
//Log.Info("CPBO::Bind %d", id);
}
bool CPBO::Done()
{
if (data != NULL)
{
GLCALL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, id));
GLCALL(glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB));
data = NULL;
}
glDeleteBuffers(1, &id);
state = UP_NONE;
return true;
}
eUPState CPBO::GetState()
{
return state;
}
bool CPBO::Map()
{
GLCALL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, id));
data = (unsigned char*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
//Log.Info("CPBO::Map() %d", id);
state = UP_MAPPED;
return true;
}
bool CPBO::Unmap()
{
GLCALL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, id));
GLCALL(glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB));
//Log.Info("CPBO::Unmap() %d", id);
data = NULL;
state = UP_UNMAPPED;
return true;
}
unsigned char* CPBO::Lock()
{
state = UP_LOCKED;
//Log.Info("CPBO::Lock() %d", id);
return data;
}
bool CPBO::Unlock( int used, void* UID )
{
usedsize = used;
userID = UID;
state = UP_FULL;
//Log.Info("CPBO::Unlock() %d", id);
return true;
}
unsigned int CPBO::GetMaxSize()
{
return maxsize;
}
//////////////////////////////////////////////////////////////////////////
//
//////////////////////////////////////////////////////////////////////////
CUploadPBOPool::CUploadPBOPool(void)
{
}
CUploadPBOPool::~CUploadPBOPool(void)
{
}
bool CUploadPBOPool::CreateUploadPBOPool( IN int num_buffers, IN unsigned int max_size )
{
for (int i=0; i<num_buffers; i++)
{
CPBO pbo;
m_PBOs.push_back(pbo);
}
for (int i=0; i<(int)m_PBOs.size(); i++)
{
m_PBOs[i].Init(max_size);
m_PBOs[i].Map();
}
return true;
}
int CUploadPBOPool::Lock(OUT unsigned char** data, OUT unsigned int* pSize )
{
m_LockPBOs.Lock();
for (int i=0; i<(int)m_PBOs.size(); i++)
{
CPBO& pbo = m_PBOs[i];
if (pbo.GetState() == UP_MAPPED)
{
*data = pbo.Lock();
*pSize = pbo.GetMaxSize();
m_LockPBOs.Unlock();
return i;
}
}
m_LockPBOs.Unlock();
return -1;
}
bool CUploadPBOPool::Unlock( IN int index, IN void* userID, IN int used )
{
if (index < 0) return false;
if (index >= (int)m_PBOs.size()) return false;
m_LockPBOs.Lock();
CPBO& pbo= m_PBOs[index];
pbo.Unlock(used, userID);
m_LockPBOs.Unlock();
return true;
}
bool CUploadPBOPool::UpdateUploadPBOPool()
{
m_LockPBOs.Lock();
for (int i=0; i<(int)m_PBOs.size(); i++)
{
CPBO& pbo = m_PBOs[i];
eUPState state = pbo.GetState();
//Log.Info("pbo %d st = %d", i, state);
switch (state)
{
case UP_NONE:
Log.Error("CUploadPBOPool::Update invalid state %d", i);
break;
case UP_FULL:
//Log.Info("case UP_FULL");
pbo.Unmap();
m_LockPBOs.Unlock();
ProcessPBOData(pbo.data, pbo.usedsize, pbo.userID);
m_LockPBOs.Lock();
pbo.state = UP_PENDING;
break;
case UP_UNMAPPED:
pbo.state = UP_PENDING;
break;
case UP_PENDING:
pbo.Map();
break;
}
}
GLCALL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
m_LockPBOs.Unlock();
return true;
}
bool CUploadPBOPool::DeleteUploadPBOPool()
{
m_LockPBOs.Lock();
for (int i=0; i<(int)m_PBOs.size(); i++)
{
CPBO& pbo = m_PBOs[i];
pbo.Done();
}
m_PBOs.clear();
m_LockPBOs.Unlock();
GLCALL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
return true;
}
Usage:
- derive your render class from CUploadPBOPool
- implement:
virtual void ProcessPBOData(unsigned char* data, unsigned int datasize, void* userID) {};
example:
void FXEngine::ProcessPBOData( unsigned char* data, unsigned int datasize, void* userID )
{
CVideoTexture* vtex = (CVideoTexture*)userID;
vtex->UpdateTexture(data, datasize);
}
- in some thread…
// already initialised stuff...
CVideoTexture* vtex;
unsiged char* img_data;
unsigned int img_size;
// temporary vars
unsigned char* pboptr;
unsigned int pbolen;
void* userID = (void*)vtex;
// lock one of the buffers
int index = renderer->Lock(&pboptr, &pbolen);
if ((index != -1) && (pbolen < img_size))
{
// copy content to buffer
memcpy(pboptr, data, img_size); // or use some faster memcpy code...
// unlock buffer
renderer->Unlock(index, userID, img_size);
}