Happy New Year Dark Photon, I have spent the whole week to write the code and test for depth with R32F. But it still does not show the right depth image. Here below are the code which I have edited from a CUDA interop with texture color. Where can be the error?
The main.cpp:
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <iostream>
#include <string>
#include <math.h>
#define GLEW_STATIC // Specify GLEW_STATIC to use the static linked library (.lib) instead of the dynamic linked library (.dll) for GLEW
#include <GL/glew.h>
#include <glut.h>
// CUDA headers
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
#include "Postprocess.cu"
#define SRC_BUFFER 0
#define DST_BUFFER 1
#define SRC_DEPTH 2
int g_iGLUTWindowHandle = 0;
int g_iWindowPositionX = 0;
int g_iWindowPositionY = 0;
int g_iWindowWidth = 512;
int g_iWindowHeight = 512;
int g_iImageWidth = g_iWindowWidth;
int g_iImageHeight = g_iWindowHeight;
float g_fRotate[3] = { 0.0f, 0.0f, 0.0f }; // Rotation parameter for scene object.
bool g_bAnimate = true; // Animate the scene object
bool g_bPostProcess = true; // Enable/disable the postprocess effect.
float g_fBlurRadius = 2.0f; // Radius of 2D convolution blur performed in postprocess step.
GLuint g_GLFramebuffer = 0; // Frame buffer object for off-screen rendering.
GLuint g_GLColorAttachment0 = 0; // Color texture to attach to frame buffer object.
GLuint g_GLDepthAttachment = 0; // Depth buffer to attach to frame buffer object.
GLuint depthTexture = 0;
GLuint g_GLPostprocessTexture = 0; // This is where the result of the post-process effect will go.
// This is also the final texture that will be blit to the back buffer for viewing.
// The CUDA Graphics Resource is used to map the OpenGL texture to a CUDA
// buffer that can be used in a CUDA kernel.
// We need 2 resource: One will be used to map to the color attachment of the
// framebuffer and used read-only from the CUDA kernel (SRC_BUFFER),
// the second is used to write the postprocess effect to (DST_BUFFER).
cudaGraphicsResource_t g_CUDAGraphicsResource[3] = { 0, 0, 0};
// Initialize OpenGL/GLUT
bool InitGL( int argc, char* argv[] );
// Initialize CUDA for OpenGL
void InitCUDA();
// Render a texture object to the current framebuffer
void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height );
// Create a framebuffer object that is used for offscreen rendering.
void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture );
void DeleteFramebuffer( GLuint& framebuffer );
void CreatePBO( GLuint& bufferID, size_t size );
void DeletePBO( GLuint& bufferID );
void CreateTexture( GLuint& texture, unsigned int width, unsigned int height );
void DeleteTexture( GLuint& texture );
void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height );
void DeleteDepthBuffer( GLuint& depthBuffer );
// Links a OpenGL texture object to a CUDA resource that can be used in the CUDA kernel.
void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags );
void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource );
void IdleGL();
void DisplayGL();
void KeyboardGL( unsigned char key, int x, int y );
void ReshapeGL( int w, int h );
void Cleanup( int errorCode, bool bExit = true )
{
if ( g_iGLUTWindowHandle != 0 )
{
glutDestroyWindow( g_iGLUTWindowHandle );
g_iGLUTWindowHandle = 0;
}
if ( bExit )
{
exit( errorCode );
}
}
// Create a pixel buffer object
void CreatePBO( GLuint& bufferID, size_t size )
{
// Make sure the buffer doesn't already exist
DeletePBO( bufferID );
glGenBuffers( 1, &bufferID );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER, bufferID );
glBufferData( GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_STREAM_DRAW );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
}
void DeletePBO( GLuint& bufferID )
{
if ( bufferID != 0 )
{
glDeleteBuffers( 1, &bufferID );
bufferID = 0;
}
}
// Create a texture resource for rendering to.
void CreateTexture( GLuint& texture, unsigned int width, unsigned int height )
{
// Make sure we don't already have a texture defined here
DeleteTexture( texture );
glGenTextures( 1, &texture );
glBindTexture( GL_TEXTURE_2D, texture );
// set basic parameters
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// Create texture data (4-component unsigned byte)
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );
// Unbind the texture
glBindTexture( GL_TEXTURE_2D, 0 );
}
void DeleteTexture( GLuint& texture )
{
if ( texture != 0 )
{
glDeleteTextures(1, &texture );
texture = 0;
}
}
void CreateDepthTexture(GLuint& txDepth, unsigned int width, unsigned int height)
{
DeleteTexture(txDepth);
glGenTextures(1, &txDepth);
glBindTexture(GL_TEXTURE_2D, txDepth);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
glBindTexture(GL_TEXTURE_2D, 0);
}
void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height )
{
// Delete the existing depth buffer if there is one.
DeleteDepthBuffer( depthBuffer );
glGenRenderbuffers( 1, &depthBuffer );
glBindRenderbuffer( GL_RENDERBUFFER, depthBuffer );
glRenderbufferStorage( GL_RENDERBUFFER, GL_DEPTH_COMPONENT, width, height );
// Unbind the depth buffer
glBindRenderbuffer( GL_RENDERBUFFER, 0 );
}
void DeleteDepthBuffer( GLuint& depthBuffer )
{
if ( depthBuffer != 0 )
{
glDeleteRenderbuffers( 1, &depthBuffer );
depthBuffer = 0;
}
}
void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture )
{
// Delete the existing framebuffer if it exists.
DeleteFramebuffer( framebuffer );
glGenFramebuffers( 1, &framebuffer );
glBindFramebuffer( GL_FRAMEBUFFER, framebuffer );
glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorAttachment0, 0 );
glFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTexture, 0);
glFramebufferRenderbuffer( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depthAttachment );
// Check to see if the frame buffer is valid
GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
{
std::cerr << "ERROR: Incomplete framebuffer status." << std::endl;
}
// Unbind the frame buffer
glBindFramebuffer( GL_FRAMEBUFFER, 0 );
}
void DeleteFramebuffer( GLuint& framebuffer )
{
if ( framebuffer != 0 )
{
glDeleteFramebuffers( 1, &framebuffer );
framebuffer = 0;
}
}
void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags )
{
// Map the GL texture resource with the CUDA resource
cudaGraphicsGLRegisterImage( &cudaResource, GLtexture, GL_TEXTURE_2D, mapFlags );
}
void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource )
{
if ( cudaResource != 0 )
{
cudaGraphicsUnregisterResource( cudaResource );
cudaResource = 0;
}
}
int main( int argc, char* argv[] )
{
glutInit(&argc, argv);
// Init GLUT
if ( !InitGL( argc, argv ) )
{
std::cerr << "ERROR: Failed to initialize OpenGL" << std::endl;
}
InitCUDA();
// Startup our GL render loop
glutMainLoop();
}
bool InitGL( int argc, char* argv[] )
{
// Material property constants.
const GLfloat fRed[] = { 1.0f, 0.1f, 0.1f, 1.0f };
const GLfloat fWhite[] = { 1.0f, 1.0f, 1.0f, 1.0f };
int iScreenWidth = glutGet(GLUT_SCREEN_WIDTH);
int iScreenHeight = glutGet(GLUT_SCREEN_HEIGHT);
// glutInit( &argc, argv );
glutInitDisplayMode( GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH );
glutInitWindowPosition( iScreenWidth / 2 - g_iWindowWidth / 2,
iScreenHeight / 2 - g_iWindowHeight / 2 );
glutInitWindowSize( g_iWindowWidth, g_iWindowHeight );
g_iGLUTWindowHandle = glutCreateWindow( "Postprocess GL" );
// Register GLUT callbacks
glutDisplayFunc(DisplayGL);
glutKeyboardFunc(KeyboardGL);
glutReshapeFunc(ReshapeGL);
glutIdleFunc(IdleGL);
// Init GLEW
glewInit();
GLboolean gGLEW = glewIsSupported(
"GL_VERSION_3_1 "
"GL_ARB_pixel_buffer_object "
"GL_ARB_framebuffer_object "
"GL_ARB_copy_buffer "
);
int maxAttachemnts = 0;
glGetIntegerv( GL_MAX_COLOR_ATTACHMENTS, &maxAttachemnts );
if ( !gGLEW ) return false;
glClearColor( 1.0f, 1.0f, 1.0f, 1.0f );
glDisable( GL_DEPTH_TEST );
// Setup the viewport
glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );
// Setup the projection matrix
glMatrixMode( GL_PROJECTION );
glLoadIdentity();
gluPerspective( 60.0, (GLdouble)g_iWindowWidth/(GLdouble)g_iWindowHeight, 0.1, 1.0 );
glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );
// Enable one light.
glEnable( GL_LIGHT0 );
glMaterialfv( GL_FRONT_AND_BACK, GL_DIFFUSE, fRed );
glMaterialfv( GL_FRONT_AND_BACK, GL_SPECULAR, fWhite );
glMaterialf( GL_FRONT_AND_BACK, GL_SHININESS, 60.0f );
return true;
}
void InitCUDA()
{
// We have to call cudaGLSetGLDevice if we want to use OpenGL interoperability.
cudaGLSetGLDevice(0);
}
void IdleGL()
{
if (g_bAnimate)
{
g_fRotate[0] += 0.2; while(g_fRotate[0] > 360.0f) g_fRotate[0] -= 360.0f; // Increment and clamp
g_fRotate[1] += 0.6; while(g_fRotate[1] > 360.0f) g_fRotate[1] -= 360.0f;
g_fRotate[2] += 1.0; while(g_fRotate[2] > 360.0f) g_fRotate[2] -= 360.0f;
}
glutPostRedisplay();
}
// Render the initial scene
void RenderScene()
{
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
glMatrixMode( GL_PROJECTION );
glLoadIdentity();
gluPerspective( 60.0, (GLdouble)g_iWindowWidth / (GLdouble)g_iWindowHeight, 0.1, 10.0 );
glMatrixMode( GL_MODELVIEW );
glLoadIdentity();
glTranslatef( 0.0f, 0.0f, -3.0f );
glRotatef( g_fRotate[0], 1.0f, 0.0f, 0.0f );
glRotatef( g_fRotate[1], 0.0f, 1.0f, 0.0f );
glRotatef( g_fRotate[2], 0.0f, 0.0f, 1.0f );
glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );
glEnable( GL_LIGHTING );
glEnable( GL_DEPTH_TEST );
glDepthFunc( GL_LESS );
glutSolidTeapot( 1.0 );
}
// Perform a post-process effect on the current framebuffer (back buffer)
void Postprocess()
{
if ( g_bPostProcess )
{
PostprocessCUDA( g_CUDAGraphicsResource[DST_BUFFER], g_CUDAGraphicsResource[SRC_BUFFER], g_CUDAGraphicsResource[SRC_DEPTH] , g_iImageWidth, g_iImageHeight );
}
else
{
// No postprocess effect. Just copy the contents of the color buffer
// from the framebuffer (where the scene was rendered) to the
// post-process texture. The postprocess texture will be rendered to the screen
// in the next step.
glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
glBindTexture( GL_TEXTURE_2D, g_GLPostprocessTexture );
glCopyTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, 0, 0, g_iImageWidth, g_iImageHeight, 0 );
glBindTexture( GL_TEXTURE_2D, 0 );
glBindFramebuffer( GL_FRAMEBUFFER, 0 );
}
}
void DisplayGL()
{
// Bind the framebuffer that we want to use as the render target.
glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
RenderScene();
// Unbind the framebuffer so we render to the back buffer again.
glBindFramebuffer( GL_FRAMEBUFFER, 0 );
Postprocess();
// Blit the image full-screen
DisplayImage( g_GLPostprocessTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );
glutSwapBuffers();
glutPostRedisplay();
}
void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height )
{
glBindTexture(GL_TEXTURE_2D, texture);
glEnable(GL_TEXTURE_2D);
glDisable(GL_DEPTH_TEST);
glDisable(GL_LIGHTING);
glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();
glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);
glMatrixMode( GL_MODELVIEW);
glLoadIdentity();
glPushAttrib( GL_VIEWPORT_BIT );
glViewport(x, y, width, height );
glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0); glVertex3f(-1.0, -1.0, 0.5);
glTexCoord2f(1.0, 0.0); glVertex3f(1.0, -1.0, 0.5);
glTexCoord2f(1.0, 1.0); glVertex3f(1.0, 1.0, 0.5);
glTexCoord2f(0.0, 1.0); glVertex3f(-1.0, 1.0, 0.5);
glEnd();
glPopAttrib();
glMatrixMode(GL_PROJECTION);
glPopMatrix();
glDisable(GL_TEXTURE_2D);
}
void KeyboardGL( unsigned char key, int x, int y )
{
switch( key )
{
case '\033': // escape quits
case 'Q': // Q quits
case 'q': // q quits
{
// Cleanup up and quit
Cleanup(0);
}
break;
}
glutPostRedisplay();
}
void ReshapeGL( int w, int h )
{
h = std::max(h, 1);
g_iWindowWidth = w;
g_iWindowHeight = h;
g_iImageWidth = w;
g_iImageHeight = h;
// Create a surface texture to render the scene to.
CreateTexture( g_GLColorAttachment0, g_iImageWidth, g_iImageHeight );
// Create a depth buffer for the frame buffer object.
CreateDepthBuffer( g_GLDepthAttachment, g_iImageWidth, g_iImageHeight );
CreateDepthTexture( depthTexture, g_iImageWidth, g_iImageHeight);
// Attach the color and depth textures to the framebuffer.
CreateFramebuffer( g_GLFramebuffer, g_GLColorAttachment0, g_GLDepthAttachment , depthTexture);
// Create a texture to render the post-process effect to.
CreateTexture( g_GLPostprocessTexture, g_iImageWidth, g_iImageHeight );
// Map the color attachment to a CUDA graphics resource so we can read it in a CUDA a kernel.
CreateCUDAResource( g_CUDAGraphicsResource[SRC_BUFFER], g_GLColorAttachment0, cudaGraphicsMapFlagsReadOnly );
CreateCUDAResource( g_CUDAGraphicsResource[SRC_DEPTH], depthTexture, cudaGraphicsMapFlagsReadOnly );
// Map the post-process texture to the CUDA resource so it can be
// written in the kernel.
CreateCUDAResource( g_CUDAGraphicsResource[DST_BUFFER], g_GLPostprocessTexture, cudaGraphicsMapFlagsWriteDiscard );
glutPostRedisplay();
}
And here is postProcess.cu:
#include <cuda_runtime_api.h>
#include "Postprocess.h"
#define BLOCK_SIZE 16 // block size
texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;
texture<float, cudaTextureType2D, cudaReadModeElementType> depRef;
__global__ void PostprocessKernel( uchar4* dst, unsigned int imgWidth, unsigned int imgHeight )
{
unsigned int tx = threadIdx.x;
unsigned int ty = threadIdx.y;
unsigned int bw = blockDim.x;
unsigned int bh = blockDim.y;
// Non-normalized U, V coordinates of input texture for current thread.
unsigned int u = ( bw * blockIdx.x ) + tx;
unsigned int v = ( bh * blockIdx.y ) + ty;
// Early-out if we are beyond the texture coordinates for our texture.
if ( u > imgWidth || v > imgHeight ) return;
unsigned int index = ( v * imgWidth ) + u;
// uchar4 color = tex2D( texRef, u, v );
float depth = tex2D(depRef, u, v);
// dst[index] = make_uchar4( color.x, color.y, color.z, 1);
dst[index] = make_uchar4( depth*255, depth*255, depth*255, 1);
}
uchar4* g_dstBuffer = NULL;
size_t g_BufferSize = 0;
void PostprocessCUDA( cudaGraphicsResource_t& dst, cudaGraphicsResource_t& src, cudaGraphicsResource_t& srcDepth, unsigned int width, unsigned int height)
{
cudaGraphicsResource_t resources[2] = { srcDepth, dst };
// Map the resources so they can be used in the kernel.
cudaGraphicsMapResources( 2, resources ) ;
// cudaArray* srcArray;
cudaArray* dstArray;
cudaArray* srcDepthArray;
// Get a device pointer to the OpenGL buffers
// cudaGraphicsSubResourceGetMappedArray( &srcArray, src, 0, 0 ) ;
cudaGraphicsSubResourceGetMappedArray( &srcDepthArray, srcDepth, 0, 0 ) ;
cudaGraphicsSubResourceGetMappedArray( &dstArray, dst, 0, 0 ) ;
// Map the source texture to a texture reference.
// cudaBindTextureToArray( texRef, srcArray );
cudaBindTextureToArray( depRef, srcDepthArray );
// Destination buffer to store the result of the postprocess effect.
size_t bufferSize = width * height * sizeof(uchar4);
if ( g_BufferSize != bufferSize )
{
if ( g_dstBuffer != NULL )
{
cudaFree( g_dstBuffer );
}
// Only re-allocate the global memory buffer if the screen size changes,
// or it has never been allocated before (g_BufferSize is still 0)
g_BufferSize = bufferSize;
cudaMalloc( &g_dstBuffer, g_BufferSize );
}
// Compute the grid size
size_t blocksW = (size_t)ceilf( width / (float)BLOCK_SIZE );
size_t blocksH = (size_t)ceilf( height / (float)BLOCK_SIZE );
dim3 gridDim( blocksW, blocksH, 1 );
dim3 blockDim( BLOCK_SIZE, BLOCK_SIZE, 1 );
PostprocessKernel<<< gridDim, blockDim >>>( g_dstBuffer, width, height );
// Copy the destination back to the source array
cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice );
// Unbind the texture reference
// cudaUnbindTexture( texRef);
cudaUnbindTexture( depRef);
// Unmap the resources again so the texture can be rendered in OpenGL
cudaGraphicsUnmapResources( 2, resources ) ;
}
And here is CMakeLists.txt
cmake_minimum_required (VERSION 2.8)
project (PostprocessLinux)
find_package(CUDA)
include_directories(${PROJECT_SOURCE_DIR}/include)
link_directories(${PROJECT_SOURCE_DIR}/lib)
cuda_include_directories("/usr/local/cuda-6.5/include")
link_directories("/usr/local/cuda-6.5/lib64")
cuda_compile(Postprocess_O, Postprocess.cu)
cuda_compile(Main_O main.cu)
cuda_add_executable (test ${Postprocess_O} ${Main_O} )
target_link_libraries(test "-lglut" "-lGL" "-lGLU" "-lGLEW" "-lpthread" "-lcudart")
INSTALL(TARGETS test RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
To build it, you need make a build directory and cmake … and then make