I am at the end…
I need to be able to render a 1920x1080 HD video window on a second monitor and two 960x540 video windows on the first monitor all with overlays. Each window has an independent video update rate of 30 Hz. The performance problem comes in when I add the overlays (each independently updating at 10 Hz). I’m using a double buffer PBO scheme with glMapBufferRange() to allow for asynchronous updates of video and overlay frames. I suspect that the bottleneck has to do with the blending an/or rendering of the ABGR overlay, but I am hoping someone can point out obvious things that I am doing wrong, I’ll attach code… Note, this will likely not compile, but should be enough to give someone a good idea as to what I could be doing wrong or could do better. Thanks!
Having trouble adding the code, so I’ll try to do it in sections:
class Mutex {
public:
Mutex() { pthread_mutex_init(&mMutex, NULL); }
virtual ~Mutex() { pthread_mutex_destroy(&mMutex); }
virtual bool Lock() { return (0 == pthread_mutex_lock(&mMutex)); }
virtual void Unlock() { pthread_mutex_unlock(&mMutex); }
protected:
pthread_mutex_t mMutex;
};
class TryMutex : public Mutex {
public:
TryMutex() { }
virtual ~TryMutex() { }
virtual bool Lock() { return (0 == pthread_mutex_trylock(&mMutex)); }
};
class TimedMutex : public Mutex {
public:
TimedMutex(uint32_t aWaitSecs, uint64_t aWaitMicrosSecs) :
mWaitSeconds(aWaitSecs), mWaitMicroSeconds(aWaitMicrosSecs) { }
virtual ~TimedMutex() { }
virtual bool Lock() {
struct timespec tTimeout;
clock_gettime(CLOCK_REALTIME, &tTimeout);
tTimeout.tv_sec += mWaitSeconds;
tTimeout.tv_nsec += mWaitMicroSeconds * 1000;
return (0 == pthread_mutex_timedlock(&mMutex, &tTimeout));
}
private:
uint32_t mWaitSeconds;
uint64_t mWaitMicroSeconds;
};
class GLBufferObject {
public:
GLBufferObject(Mutex *aMutexImpl, unsigned int aBytesPerPixel = 3) {
mUpdated(false),
mSizeChanged(false),
mSize(0),
mWidth(0),
mHeight(0),
mCurBuffer(0),
mNumBuffered(0),
mBytesPerPixel(aBytesPerPixel),
mPixelMap(NULL),
mFormat((3 == aBytesPerPixel) ? GL_RGB : GL_ABGR_EXT),
mInternalFormat((3 == aBytesPerPixel) ? GL_RGB : GL_RGBA),
mTextureId(0),
mMutex(aMutexImpl) {
}
virtual ~GLBufferObject() {
if (mMutex) {
delete mMutex;
}
}
void Destroy() {
if (mMutex->Lock()) {
if (mPixelMap) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mBufferId[mCurBuffer]);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
mMutex->Unlock();
}
glDeleteBuffers(2, mBufferId);
glDeleteTextures(1, &mTextureId);
}
void Initialize() {
glGenTextures(1, &mTextureId);
glBindTexture(GL_TEXTURE_2D, mTextureId);
glTexImage2D(GL_TEXTURE_2D, 0, mInternalFormat, GLCANVAS_MAX_WIDTH, GLCANVAS_MAX_HEIGHT, 0, mFormat, GL_UNSIGNED_BYTE, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, 0);
glGenBuffers(2, mBufferId);
for (int i=0; i<2; ++i) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mBufferId[i]);
glBufferData(GL_PIXEL_UNPACK_BUFFER, (GLCANVAS_MAX_WIDTH * GLCANVAS_MAX_HEIGHT * mBytesPerPixel), NULL, GL_DYNAMIC_COPY);
if (0 == i) {
mPixelMap = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, (GLCANVAS_MAX_WIDTH * GLCANVAS_MAX_HEIGHT * mBytesPerPixel), MAP_BUFFER_OPTIONS);
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
}
bool Swap() {
bool tResult = false;
if (mMutex->Lock()) {
tResult = mUpdated;
if (mUpdated) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mBufferId[mCurBuffer]);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
mUpdated = false;
mPixelMap = NULL;
mCurBuffer = !mCurBuffer;
++mNumBuffered;
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mBufferId[mCurBuffer]);
if (mNumBuffered > 1) {
--mNumBuffered;
glBindTexture(GL_TEXTURE_2D, mTextureId);
if (!mSizeChanged) {
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, mFormat, GL_UNSIGNED_BYTE, 0);
} else {
printf("GLCanvas::GLBufferObject::Swap() - Input size changed %ux%u
", mWidth, mHeight);
mSizeChanged = false;
glTexImage2D(GL_TEXTURE_2D, 0, mInternalFormat, mWidth, mHeight, 0, mFormat, GL_UNSIGNED_BYTE, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
}
glBindTexture(GL_TEXTURE_2D, 0);
}
// Use the "orphaning" technique to help prevent overwrite/tearing
glBufferData(GL_PIXEL_UNPACK_BUFFER, (GLCANVAS_MAX_WIDTH * GLCANVAS_MAX_HEIGHT * mBytesPerPixel), NULL, GL_DYNAMIC_COPY);
mPixelMap = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, mSize, MAP_BUFFER_OPTIONS);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
mMutex->Unlock();
} else {
printf("GLCanvas::GLBufferObject::Swap() - Failed to acquire lock for update
");
}
return tResult;
}
bool Update(const void *aPixels, uint32_t aWidth, uint32_t aHeight) {
bool tResult = false;
if (mMutex->Lock()) {
if (mPixelMap) {
mSize = aWidth * aHeight * mBytesPerPixel;
memcpy(mPixelMap, aPixels, mSize);
if (mHeight != aHeight || mWidth != aWidth) {
mSizeChanged = true;
}
mHeight = aHeight;
mWidth = aWidth;
mUpdated = true;
tResult = true;
} else {
printf("GLCanvas::GLBufferObject::Update() pixels are not mapped!
");
}
mMutex->Unlock();
}
return tResult;
}
bool mUpdated;
bool mSizeChanged;
unsigned int mSize;
unsigned int mWidth;
unsigned int mHeight;
unsigned int mCurBuffer;
unsigned int mNumBuffered;
unsigned int mBytesPerPixel;
GLvoid *mPixelMap;
GLint mFormat;
GLint mInternalFormat;
GLuint mTextureId;
GLuint mBufferId[2];
Mutex *mMutex;
};
#define GLCANVAS_MAX_HEIGHT 1080
#define GLCANVAS_MAX_WIDTH 1920
#define MAP_BUFFER_OPTIONS (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)
#include <GL/glx.h>
class GLCanvas {
public:
/**
*
*/
GLCanvas() {
mRendering(false),
mInitialized(false),
mCanvasSizeChanged(true),
mContext(NULL),
mInputBuffer(new Mutex()),
mOverlayBuffer(new TryMutex(), 4) {
pthread_mutex_init(&mWaitMutex, NULL);
pthread_cond_init(&mWaitCondition, NULL);
}
/**
*
*/
virtual ~GLCanvas() {
pthread_cond_destroy(&mWaitCondition);
pthread_mutex_destroy(&mWaitMutex);
}
/**
* Sets the Display and Drawable parameters for the associated window
* @param aDrawable
*/
virtual void setDrawable(Display aDisplay, Drawable aDrawable) {
if (0 == pthread_mutex_lock(&sMutex)) {
mDisplay = aDisplay;
mContext = createScreenCorrectContext(aDrawable);
if (mContext) {
mDrawable = aDrawable;
if (glXIsDirect(mDisplay, mContext)) {
printf("Direct rendering supported
");
mRendering = true;
mRenderThread.start();
} else {
printf("GLCanvas::setDrawable() - Direct rendering unsupported
");
}
} else {
printf("GLCanvas::setDrawable() - Failed to create GL context!
");
}
pthread_mutex_unlock(&sMutex);
}
}
/**
* Update the video frame pixels - expected to be in RGB
* @param aPixels
* @param aWidth
* @param aHeight
*/
virtual void setFrame(const void *aPixels, int aWidth, int aHeight) {
if (!mInputBuffer.Update(aData, tDesc.getWidth(), tDesc.getHeight())) {
printf("GLCanvas::setFrame() - dropping frame!
");
}
}
/**
* Update the overlay pixels - expected to be in ABGR
* @param aPixels
* @param aWidth
* @param aHeight
*/
virtual void setOverlay(const void *aPixels, int aWidth, int aHeight) {
mOverlayBuffer.Update(aPixels, aWidth, aHeight);
}
/**
* Set the size of the canvas
* @param aWidth
* @param aHeight
*/
virtual void setSize(int aWidth, int aHeight) {
mCanvasWidth = aWidth;
mCanvasHeight = aHeight;
mCanvasSizeChanged = true;
}
/**
* Set the rendering coordinates of the video within the canvas
* @param aX0
* @param aY0
* @param aX1
* @param aY1
*/
virtual void setRenderingCoordinates(int aX0, int aY0, int aX1, int aY1) {
mX0 = aX0;
mY0 = aY0;
mX1 = aX1;
mY1 = aY1;
}
private:
void idle() {
mInputBuffer.Swap();
mOverlayBuffer.Swap();
}
bool init() {
if (!mInitialized) {
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glClearDepth(1.0f);
glEnable(GL_TEXTURE_2D);
glEnable(GL_ALPHA_TEST);
glAlphaFunc(GL_GREATER, 0.5f);
mInputBuffer.Initialize(); // RGB
mOverlayBuffer.Initialize(); // RGBA
mInitialized = true;
}
return mInitialized;
}
void dispose() {
mInputBuffer.Destroy();
mOverlayBuffer.Destroy();
}
bool render() {
bool tResult = true;
/*
* Clearing is needed because of changes between HD -> SD and canvas resize
*/
if (mCanvasSizeChanged) {
mCanvasSizeChanged = false;
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0.0f, (GLdouble) mCanvasWidth, 0.0f, (GLdouble) mCanvasHeight, -1.0f, 1.0f);
glViewport(0, 0, mCanvasWidth, mCanvasHeight);
// glHint(GL_POLYGON_SMOOTH_HINT, GL_NICEST);
glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST);
glMatrixMode(GL_MODELVIEW);
}
glClear(GL_COLOR_BUFFER_BIT);
/*
* Render the input video texture
*/
glBindTexture(GL_TEXTURE_2D, mInputBuffer.mTextureId);
if (0 == mX1 || 0 == mY1) {
p”rintf(setRenderingCoordinates() wasn't called, defaulting to canvas size!
”);
mX1 = mCanvasWidth;
mY1 = mCanvasHeight;
}
glBegin(GL_QUADS);
glTexCoord2i(0, 1); glVertex2i(mX0, mY0);
glTexCoord2i(1, 1); glVertex2i(mX1, mY0);
glTexCoord2i(1, 0); glVertex2i(mX1, mY1);
glTexCoord2i(0, 0); glVertex2i(mX0, mY1);
glEnd();
glBindTexture(GL_TEXTURE_2D, 0);
/*
* Render the overlay
*/
if (mOverlayBuffer.mWidth > 0) {
glBindTexture(GL_TEXTURE_2D, mOverlayBuffer.mTextureId);
glBegin(GL_QUADS);
glTexCoord2i(0, 1); glVertex2i(0, 0);
glTexCoord2i(1, 1); glVertex2i(mCanvasWidth, 0);
glTexCoord2i(1, 0); glVertex2i(mCanvasWidth, mCanvasHeight);
glTexCoord2i(0, 0); glVertex2i(0, mCanvasHeight);
glEnd();
glBindTexture(GL_TEXTURE_2D, 0);
}
glXSwapBuffers(mDisplay, mDrawable);
return tResult;
}
uint64_t getTime() {
struct timeval tNow;
gettimeofday(&tNow, NULL);
return ((uint64_t)tNow.tv_sec * 1000000 + tNow.tv_usec);
}
GLXContext createScreenCorrectContext(Drawable aDrawable, GLXContext aSharedCtx = NULL) {
GLXContext tContext = NULL;
GLint tAttributes[] = {GLX_RGBA, GLX_DEPTH_SIZE, 24, GLX_DOUBLEBUFFER, None};
XVisualInfo *tVisualInfo = glXChooseVisual(mDisplay, XDefaultScreen(mDisplay), tAttributes);
if (tVisualInfo) {
tContext = glXCreateContext(mDisplay, tVisualInfo, aSharedCtx, GL_TRUE);;
if (tContext) {
if (XScreenCount(mDisplay) > 1) {
int tResult = glXMakeCurrent(mDisplay, aDrawable, tContext);
// When failure to make current, probably on the wrong screen
if (GL_TRUE != tResult) {
Display *tDisplay = XOpenDisplay(":0.1");
printf("GLCanvas::createScreenCorrectContext() - glXMakeCurrent() failed on multi-screen display, trying other screen
");
if (tDisplay) {
// Destroy the context for the default display
glXDestroyContext(tDisplay, tContext);
tContext = NULL;
// Currently only support screen 1 TI12/TI14
tVisualInfo = glXChooseVisual(tDisplay, 1, tAttributes);
if (tVisualInfo) {
tContext = glXCreateContext(tDisplay, tVisualInfo, aSharedCtx, GL_TRUE);
if (tContext) {
setDisplay(tDisplay);
} else {
printf("GLCanvas::createScreenCorrectContext() - glXCreateContext() for screen 1 failed!
");
}
} else {
printf("GLCanvas::createScreenCorrectContext() - glXChooseVisual() for screen 1 failed!
");
}
} else {
printf("GLCanvas::createScreenCorrectContext() - XOpenDisplay(:0.1) failed!
");
}
} else {
// Ensure to undo the successful glXMakeCurrent()
glXMakeCurrent(mDisplay, None, NULL);
}
} else {
printf("GLCanvas::createScreenCorrectContext() - detected a single screen
");
}
} else {
printf("GLCanvas::createScreenCorrectContext() - glXCreateContext() failed!
");
}
} else {
printf("GLCanvas::createScreenCorrectContext() - glXChooseVisual() for default screen failed!
");
}
return tContext;
}
virtual void start() {
printf("GLCanvas::start() starting render thread...
");
if (GL_TRUE == glXMakeCurrent(mDisplay, mDrawable, mContext)) {
if (init()) {
uint64_t tStop, tStart;
while (mRendering) {
render();
tStart = getTime();
idle();
tStop = getTime();
/*
* Assuming a 60 Hz refresh, there should be about 16 ms of idle
* time to update overlays and video input, report error when the idle
* operations take more than 90% of this allowed time...
*/
if (tStop - tStart > 14400) {
printf("GLCanvas::start(%x) - idle() time: %lu taking more than 90%% of available time
", (int)mDrawable, tStop - tStart);
}
}
dispose();
} else {
printf("GLCanvas::start() - Failed to initialize GL parameters
");
}
glXMakeCurrent(mDisplay, None, NULL);
} else {
printf("GLCanvas::start() - Failed to make context current
");
}
printf("GLCanvas::start() thread ended...
");
}
static pthread_mutex_t sMutex = PTHREAD_MUTEX_INITIALIZER;
Thread mRenderThread;
bool mRendering;
bool mInitialized;
bool mCanvasSizeChanged;
GLXContext mContext;
pthread_mutex_t mWaitMutex;
pthread_cond_t mWaitCondition;
GLBufferObject mInputBuffer;
GLBufferObject mOverlayBuffer;
};