Multisample FBO

Hi,

I am trying to render to an off screen FBO with multisampling and then read the finished buffer to PC RAM.

This is a relatively new extension and I haven’t found any sample on the net that would tell me how to do this. I know that I have to use BlitFramebufferEXT to copy to a standard renderbuffer before ReadPixels can work but I found even less information about BlitFramebuffer and what I tried so far does not work.

Even anyone can help with some code or link, I would be very grateful.

It’s a work in progress, but it does what it’s supposed to do.

Multisample blits are currently inverted on NVIDIA’s drivers (95 and 100 series)

namespace {
	GLenum GetValidFBODepthFormat() {
		
		GLint internalDepthFormat = GL_DEPTH_COMPONENT24_ARB;

		std::string vendor = reinterpret_cast< const char * >( glGetString(GL_VENDOR) );
		if( vendor.find("ATI Technologies Inc.") != std::string::npos ) {
			common->RSafePrintf("Using ATI-safe DEPTH_COMPONENT16
");
			internalDepthFormat = GL_DEPTH_COMPONENT16_ARB;
		}

		return internalDepthFormat;

	}

	void R_ComputeFBODimensions( renderTarget_t &rt ) {
		float supersamples = cvarSystem->GetFloat("r_postProcessSuperSamples");

		supersamples = NxMath::clamp( supersamples, 16.0f, 0.025f );
		common->Printf("Samples: %4.4f
", supersamples );

		const int reqBufferWidth  = static_cast< int >( float(appWindow.width)  * supersamples );
		const int reqBufferHeight = static_cast< int >( float(appWindow.height) * supersamples );
		if( cvarSystem->GetBool("r_postProcessAllowNPOT") ) {
			rt.width  = reqBufferWidth;
			rt.height = reqBufferHeight;
		}
		else if( cvarSystem->GetInteger("r_backBufferWidth") ) {
			rt.width  = cvarSystem->GetInteger("r_backBufferWidth");
			rt.height = cvarSystem->GetInteger("r_backBufferHeight");
		}
		else {
			rt.width  = PowerOf2Ceil( reqBufferWidth  );
			rt.height = PowerOf2Ceil( reqBufferHeight );
		}
		// rt.width  = std::min( PowerOf2Ceil(cvarSystem->GetInteger("r_rtt_maxWidth") ), rt.width  );
		// rt.height = std::min( PowerOf2Ceil(cvarSystem->GetInteger("r_rtt_maxHeight")), rt.height );
	}

}

void R_BindFBODepth(  renderTarget_t &rt, int multisamplesRequested, int colorSamplesRequested = 0 ) {
	
	// Detach depth _renderBuffer_, if there was one
	//
	glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, 0 );

	// Detach depth _texture_ if there was one
	//
	glFramebufferTexture2DEXT( GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, rt.textureTargetType, 0, 0 );
	
	const int depthType = cvarSystem->GetInteger("r_postProcessDepthTexture") ;
	
	// TODO: faster to use depth RENDER BUFFER?
	
	const GLint internalDepthFormat = GetValidFBODepthFormat();
	const GLenum type = GL_UNSIGNED_BYTE;

	
	if( depthType == 1 ) {

		g_rgl->BindTexture(  rt.textureTargetType, rt.fb_depthTex->_handle );
		CHECKGL;
		g_rgl->UploadTextureData2D( rt.textureTargetType, 0, internalDepthFormat, rt.width, rt.height, 0, GL_DEPTH_COMPONENT, type, NULL );
		CHECKGL;
		glTexParameteri(  rt.textureTargetType, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
		glTexParameteri(  rt.textureTargetType, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
		CHECKGL;
		glFramebufferTexture2DEXT( GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, rt.textureTargetType, rt.fb_depthTex->_handle, 0 );


		g_rgl->BindTexture( rt.textureTargetType, 0);
		CHECKGL;


	}
	else if( depthType == 2 ) {
		
		// TODO: move this and color rbs into renderTarget structure
		//
		static GLuint depth_rb = 0;
		glGenRenderbuffersEXT( 1, &depth_rb );
		// initialize depth renderbuffer
		glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, depth_rb);

		//////////////////////////////////////////////////////////////////////////
		//
		if( glrimp.exts.NV_framebuffer_multisample_coverage ) {

			int coverageSamplesRequested = multisamplesRequested;

			glRenderbufferStorageMultisampleCoverageNV( GL_RENDERBUFFER_EXT, coverageSamplesRequested, colorSamplesRequested, internalDepthFormat, rt.width, rt.height );
			CHECKGL;
			GLint givenCoverageSamples = -1;
			glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_COVERAGE_SAMPLES_NV, &givenCoverageSamples );
			GLint givenColorSamples = -1;
			glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_COLOR_SAMPLES_NV, &givenColorSamples );
			CHECKGL;

			if( givenCoverageSamples == multisamplesRequested ) {
				common->Printf(  COM_GREEN("%d multisample coverage format given
"), givenCoverageSamples );
			}
			else {
				common->Printf( COM_YELLOW("%d multisample coverage format given, different from request
"), givenCoverageSamples );
			}

			if( givenColorSamples == colorSamplesRequested ) {
				common->Printf(  COM_GREEN("%d multisample color format given
"), givenColorSamples );
			}
			else {
				common->Printf( COM_YELLOW("%d multisample color format given, different from request
"), givenColorSamples );
			}
		}
		else if( glrimp.exts.EXT_framebuffer_multisample ) {
			glRenderbufferStorageMultisampleEXT( GL_RENDERBUFFER_EXT, multisamplesRequested, internalDepthFormat, rt.width, rt.height );
		}
		else {
			glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, internalDepthFormat, rt.width, rt.height );
		}
		//////////////////////////////////////////////////////////////////////////


		// attach renderbufferto framebufferdepth buffer
		glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, depth_rb);

	}





	common->Printf( COM_GREEN("  RenderTarget( %d, %d )
"), rt.width, rt.height);
	if( depthType == 1 ) {
		common->Printf( COM_GREEN("    with depth texture
") );
	}
	else if( depthType == 2) {
		common->Printf( COM_GREEN("    with depth render buffer
") );
	}
	else {
		common->Printf( COM_GREEN("    with no depth information
") );
	}
}


void R_BindFBOColor( renderTarget_t &rt, int multisamplesRequested, int colorSamplesRequested = 0 ) {
	
	R_ComputeFBODimensions( rt );


	// Detach color _renderBuffer_, if there was one
	//
	glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, 0 );
	CHECKGL;

	// Detach color _texture_ if there was one
	//
	glFramebufferTexture2DEXT(    GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, rt.textureTargetType, 0, 0 );
	CHECKGL;


	

	const int colorType = cvarSystem->GetInteger("r_postProcessColorTexture");
	

	if( colorType == 1 ) {

		common->Printf("Allocating video memory for color texture
");
		g_rgl->BindTexture( rt.textureTargetType, rt.fb_colorTex->_handle );
		CHECKGL;


#if 1
		const GLenum internalFormat = GL_RGBA8;
		const GLenum channelType = GL_UNSIGNED_BYTE;
#else
#pragma message("Using floating point FBO!")
		const GLenum internalFormat = GL_RGBA32F_ARB;
		const GLenum channelType = GL_FLOAT;
#endif
				
		
		Renderer::GetSingleton()->UploadTextureData2D( rt.textureTargetType, 0, internalFormat, rt.width, rt.height, 0, GL_RGBA, channelType, NULL );
		CHECKGL;

		if( cvarSystem->GetBool("r_postProcessGenerateMipmaps") ) {
			// Establish the mipmap chain for the color texture
			glGenerateMipmapEXT( rt.textureTargetType );
			CHECKGL;

			// TODO: read about how texture biasing works in conjunction with with filtering
			//
			glTexParameteri(  rt.textureTargetType, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR );
			glTexParameteri(  rt.textureTargetType, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
		}
		else {
			/*
			glTexParameteri(  rt.textureTargetType, GL_TEXTURE_MIN_FILTER, GL_LINEAR );	
			glTexParameteri(  rt.textureTargetType, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
			*/
			glTexParameteri(  rt.textureTargetType, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
			glTexParameteri(  rt.textureTargetType, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
		}
		CHECKGL;
		g_rgl->BindTexture( rt.textureTargetType, 0 );

		CHECKGL;
		glFramebufferTexture2DEXT( GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, rt.textureTargetType, rt.fb_colorTex->_handle, 0 );
		CHECKGL;
		
	}
	else if( colorType == 2 ) {
		
		rt.fb_colorRB = 0;

		glGenRenderbuffersEXT( 1, &rt.fb_colorRB );
		
		// initialize color renderbuffer
		glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, rt.fb_colorRB );

		//////////////////////////////////////////////////////////////////////////
		//
		if( glrimp.exts.NV_framebuffer_multisample_coverage ) {

			int coverageSamplesRequested = multisamplesRequested;

			glRenderbufferStorageMultisampleCoverageNV( GL_RENDERBUFFER_EXT, coverageSamplesRequested, colorSamplesRequested, GL_RGBA, rt.width, rt.height );
			CHECKGL;
			GLint givenCoverageSamples = -1;
			glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_COVERAGE_SAMPLES_NV, &givenCoverageSamples );
			GLint givenColorSamples = -1;
			glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_COLOR_SAMPLES_NV, &givenColorSamples );
			CHECKGL;

			if( givenCoverageSamples == multisamplesRequested ) {
				common->Printf(  COM_GREEN("%d multisample coverage samples given
"), givenCoverageSamples );
			}
			else {
				common->Printf( COM_YELLOW("%d multisample coverage samples given, different from requested
"), givenCoverageSamples, coverageSamplesRequested );
			}

			if( givenColorSamples == colorSamplesRequested ) {
				common->Printf(  COM_GREEN("%d multisample color samples given
"), givenColorSamples );
			}
			else {
				common->Printf( COM_YELLOW("%d multisample color samples given, different from requested %d
"), givenColorSamples, colorSamplesRequested );
			}
			

		}
		else if( glrimp.exts.EXT_framebuffer_multisample ) {
			glRenderbufferStorageMultisampleEXT( GL_RENDERBUFFER_EXT, multisamplesRequested, GL_RGBA, rt.width, rt.height );
			CHECKGL;
			GLint givenSamples = -1;
			glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_SAMPLES_EXT, &givenSamples );
			CHECKGL;

			if( givenSamples == multisamplesRequested ) {
				common->Printf(  COM_GREEN("%d multisample color format given
"), givenSamples );
			}
			else {
				common->Printf( COM_YELLOW("%d multisample color format given, different from request
"), givenSamples );
			}
		}
		else {
			// glRenderbufferStorageMultisampleEXT( GL_RENDERBUFFER_EXT, /*NOTE ZERO*/ 0, internalDepthFormat, rt.width, rt.height );
			// Is the same thing as
			//
			glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, GL_RGBA, rt.width, rt.height );
			CHECKGL;
			//////////////////////////////////////////////////////////////////////////
		}

		

		// attach renderbuffer to framebuffer color buffer
		glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, rt.fb_colorRB );
		CHECKGL;
	}
	else {
		// nothing
	}

	if( colorType == 1 ) {
		common->Printf( COM_GREEN("    with color texture
") );
	}
	else if( colorType == 2) {
		common->Printf( COM_GREEN("    with color render buffer
") );
	}
	else {
		common->Printf( COM_GREEN("    with no color information
") );
	}
}


void R_BindFBO( renderTarget_t &rt, bool regenerate, bool disableMultisample = false ) {

	if( !glrimp.exts.EXT_framebuffer_object ) {
		return; 
	}

	// TODO: performance implication, should be moved into renderTarget structure
	//
	const int coverageSamples = glrimp.exts.EXT_framebuffer_multisample && !disableMultisample 
		? cvarSystem->GetInteger("r_postProcessMultisamples") : 0;
	const int colorSamples = glrimp.exts.NV_framebuffer_multisample_coverage && !disableMultisample 
		? cvarSystem->GetInteger("r_postProcessColorSamples") : 0;

	
	if( regenerate ) {
		common->Printf("Reparameterizing render target
");


		if( !rt.fbo &#0124;&#0124; !rt.fbo->_handle ) {
			common->Printf(" Generating FBO
");
			rt.fbo = Renderer::GetSingleton()->GenFrameBufferObject();

		}
		CHECKGL;
		
		if( !rt.fb_colorTex2D &#0124;&#0124; !rt.fb_colorTex2D->_handle ) {
			common->Printf(" Generating FBO's color texture2D
");
			rt.fb_colorTex2D = Renderer::GetSingleton()->GenTexture();
		}
		if( !rt.fb_depthTex2D &#0124;&#0124; !rt.fb_depthTex2D->_handle) {
			common->Printf(" Generating FBO's depth texture2D
");
			rt.fb_depthTex2D = Renderer::GetSingleton()->GenTexture();
		}
		
		if( !rt.fb_colorTexRect &#0124;&#0124; !rt.fb_colorTexRect->_handle ) {
			common->Printf(" Generating FBO's color textureRect
");
			rt.fb_colorTexRect = Renderer::GetSingleton()->GenTexture();
		}
		if( !rt.fb_depthTexRect &#0124;&#0124; !rt.fb_depthTexRect->_handle) {
			common->Printf(" Generating FBO's depth textureRect
");
			rt.fb_depthTexRect = Renderer::GetSingleton()->GenTexture();
		}
		CHECKGL;

		if( cvarSystem->GetBool("r_postProcessUseNVRECT") ) {
			rt.textureTargetType = GL_TEXTURE_RECTANGLE_NV;
			
			rt.fb_colorTex = rt.fb_colorTexRect;
			rt.fb_depthTex = rt.fb_depthTexRect;
		}
		else {
			rt.textureTargetType = GL_TEXTURE_2D;
			
			rt.fb_colorTex = rt.fb_colorTex2D;
			rt.fb_depthTex = rt.fb_depthTex2D;
		}

		if( false && coverageSamples > 0 ) {
			glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, rt.fbo->_handle );
			CHECKGL;
			printf("Using DRAW_FRAMEBUFFER (with regen)
");
		}
		else {
			glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, rt.fbo->_handle );
			CHECKGL;
		}

		R_BindFBOColor( rt, coverageSamples, colorSamples );
		CHECKGL;

		if( !disableMultisample ) {
			R_BindFBODepth( rt, coverageSamples, colorSamples );
		}
		CHECKGL;

		if( coverageSamples ) {
			// common->DPrintf( COM_YELLOW("   %d multisample framebuffer requested
"), multisamples );
		}
	
	}
	else {
		
		//Set of state that can change on a framebuffer bind
		// – AUX_BUFFERS, MAX_DRAW_BUFFERS, STEREO, AUX_BUFFERS, MAX_DRAW_BUFFERS, STEREO,
		//	SAMPLES, X_BITS, DOUBLE_BUFFER and a few others
		if( false && coverageSamples > 0 ) {
			glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, rt.fbo->_handle );
			CHECKGL;
		}
		else {
			glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, rt.fbo->_handle );
			CHECKGL;
		}
	}

	if( cvarSystem->GetBool("r_postProcessScissorClip") ) {
		glEnable( GL_SCISSOR_TEST );
		glScissor( 0, 0, appWindow.width, appWindow.height );
	}
	else {
		glDisable( GL_SCISSOR_TEST );
	}


	
	glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, rt.fbo->_handle );
	GL_CHECK_FRAMEBUFFER_STATUS(GL_FRAMEBUFFER_EXT);

	const GLenum status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT ); 
	if( status != GL_FRAMEBUFFER_COMPLETE_EXT ) {
		cvarSystem->SetInteger("r_postProcessEnable", 0);
	}

	glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, rt.fbo->_handle );
	GL_CHECK_FRAMEBUFFER_STATUS(GL_DRAW_FRAMEBUFFER_EXT);


	glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, rt.fbo->_handle );
	GL_CHECK_FRAMEBUFFER_STATUS(GL_READ_FRAMEBUFFER_EXT);

	glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 );
	GL_CHECK_FRAMEBUFFER_STATUS(GL_FRAMEBUFFER_EXT);

	glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, rt.fbo->_handle );
	GL_CHECK_FRAMEBUFFER_STATUS(GL_FRAMEBUFFER_EXT);
	
	
	CHECKGL;
}


void R_UnbindFBO( const renderTarget_t &rt ) {
	
	if( !glrimp.exts.EXT_framebuffer_object &#0124;&#0124; !cvarSystem->GetBool("r_postProcessEnable") ) {
		return;
	}

	CHECKGL;
	
	LuaManager * const svm = LuaManager::GetSingleton();

	const int samples = cvarSystem->GetInteger("r_postProcessMultisamples");
	const int blitType = cvarSystem->GetInteger("r_postProcessAllowFBBlit");
	const int srcWidth  = appWindow.width;
	const int srcHeight = appWindow.height;
	const bool flip = cvarSystem->GetBool("r_postProcessFlipFBBlit") ? true : false;
	const float scale = cvarSystem->GetFloat("r_postProcessScaleFBBlit");
	const GLenum filtering = cvarSystem->GetBool("r_postProcessFilterFBBlit") ? GL_LINEAR : GL_NEAREST;
//	const GLenum mask = svm->GetGlobalInt("g_testFBO") > 0 ? GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT : GL_COLOR_BUFFER_BIT;
	const GLenum mask = GL_COLOR_BUFFER_BIT;

#pragma warning( disable : 4244 )
	const int dstWidth  = (float)appWindow.width  * scale;
	const int dstHeight = (float)appWindow.height * scale;
#pragma warning( default : 4244 )

	

	const GLuint defaultWindow = 0;
	
	if( blitType == 2 ) {
		
		if( !rt.fbo	) {
			printf("FBO structure not allocated!
");
			exit(-1);
		}

			
		glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, rt.fbo->_handle );
		CHECKGL;

		glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, defaultWindow );
		CHECKGL;

		GL_CHECK_FRAMEBUFFER_STATUS( GL_FRAMEBUFFER_EXT );
		CHECKGL;
		GL_CHECK_FRAMEBUFFER_STATUS( GL_DRAW_FRAMEBUFFER_EXT );
		CHECKGL;
		GL_CHECK_FRAMEBUFFER_STATUS( GL_READ_FRAMEBUFFER_EXT );
		CHECKGL;

#if 0
		if( false ) {
			
			int readSamples = -1;
			glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, rt.fb_colorRB );
			CHECKGL;

			glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_SAMPLES_EXT, &readSamples );
			CHECKGL;

			int drawSamples = -1;
			// glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_SAMPLES_EXT, &drawSamples );
			// CHECKGL;

			printf("readSamples: %d, drawSamples: %d
", readSamples, drawSamples );

			GLint drawBuffer = -1;
			glGetIntegerv( GL_DRAW_FRAMEBUFFER_BINDING_EXT, &drawBuffer );
			CHECKGL;

			GLint readBuffer = -1;
			glGetIntegerv( GL_READ_FRAMEBUFFER_BINDING_EXT, &readBuffer );
			CHECKGL;

			printf("rb: %d, db: %d
", readBuffer, drawBuffer );

		}
#endif
		
		
		if( !cvarSystem->GetBool("r_postProcessSkipFBBlit") ) {
			
			
			glBlitFramebufferEXT( 0, 0, srcWidth,  srcHeight,
								  0, 0, dstWidth,  dstHeight,
								  mask, filtering );
			
			CHECKGL;

			if( flip && g_renderTarget1.fbo && g_renderTarget1.fbo->_handle ){
				glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, defaultWindow );
				CHECKGL;
				glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, g_renderTarget1.fbo->_handle );
				CHECKGL;

				glBlitFramebufferEXT( 0, srcHeight, srcWidth, 0, // reverse Y
									  0, 0, dstWidth, dstHeight,
									  mask, filtering );

				glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, g_renderTarget1.fbo->_handle );
				CHECKGL;
				glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, defaultWindow );
				CHECKGL;

				glBlitFramebufferEXT( 0, 0, srcWidth, srcHeight,
									  0, 0, dstWidth, dstHeight,
					                  mask, filtering );

				CHECKGL;
			}
			else if( false ) { // slow software? implementation

				glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, 0 );
				CHECKGL;
				glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, 0 );
				CHECKGL;
				
				glDepthFunc( GL_ALWAYS );
				glPixelZoom( 1.0, -1.0 );
				orthoSet( appWindow.width, appWindow.height );
				glRasterPos2f( 0, dstHeight * 0.5f );
				glCopyPixels( 0, 0, dstWidth, dstHeight, GL_COLOR );
				glPixelZoom( 1.0, 1.0 );
				glDepthFunc( GL_LEQUAL );
			}

		}
		CHECKGL;
		

		glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, defaultWindow );
		CHECKGL;
		glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, defaultWindow );
		CHECKGL;


		


	}
	else if( blitType == 3 ) {
		
		glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, rt.fbo->_handle );
		CHECKGL;
		glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, g_renderTarget1.fbo->_handle );
		glBlitFramebufferEXT( 0, 0, srcWidth, srcHeight,
			                  0, 0, dstWidth, dstHeight,
							  mask, filtering );

		
		
		glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, g_renderTarget1.fbo->_handle );
		glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, defaultWindow );
		glBlitFramebufferEXT( 0, 0, srcWidth, srcHeight,
							  0, 0, dstWidth, dstHeight,
							  mask, filtering );

		glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, defaultWindow );


	}
	else {
		if( glrimp.exts.EXT_framebuffer_blit ) {
			glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, 0 );
			glBindFramebufferEXT( GL_READ_FRAMEBUFFER_EXT, 0 );
		}
		else {
			glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 );
		}
		CHECKGL;
	}

	
}
  

Hi,

Thanks for the code. I don’t really know why you do quite a lot of things here.

I am trying to render to a multisample renderbuffer, then blit it to a ‘normal’ renderbuffer which I can use ReadPixels on. Could you take a look at the relevant parts of my code and tell me what am I doing wrong (I get a buffer with full of zeros. If I don’t use multisample FBO, this works but I do need the best possible antialiasing).

glGenFramebuffersEXT(1, &FBO_id);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, FBO_id);

glGenRenderbuffersEXT(1, &depth_rb);
glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, depth_rb );
glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT24, width, height );
glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, depth_rb );

glGenRenderbuffersEXT(1, &color_rb_multi);
glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, color_rb_multi );
glRenderbufferStorageMultisampleEXT( GL_RENDERBUFFER_EXT, 16, GL_RGBA, width, height );
glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, color_rb_multi);

…render scene

glGenFramebuffersEXT(1, &FBO_id_single);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, FBO_id_single);
glGenRenderbuffersEXT(1, &color_rb);
glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, color_rb );
glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT,GL_RGBA, widdth, height );
glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, color_rb );

glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, FBO_id);
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, FBO_id_single);

glBlitFramebufferEXT(0, 0, width-1, height-1, 0, 0, width-1, height-1, GL_COLOR_BUFFER_BIT, GL_NEAREST);

glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, FBO_id_single);
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
glReadPixels(0,0,width,height,GL_BGRA,GL_UNSIGNED_INT_8_8_8_8_REV,m_BitmapBits);

Thanks.

you need to request the same number of samples for your depth buffer as for your color buffer. did you get a completeness error?

Thanks! It works now, I never thought it would be this trivial.

On to the next mystery:

Previously, when I only had one non-multisampled FBO, I timed ReadPixels at 1.5 - 2 msec, now it costs at least 7, which is unacceptable. And it is also strange, I still read from the same FBO.

My card is a 8800GTS, it should be fast. Can you tell me what could cause this and is there a way to optimize readpixels?

My other questions I would appreciate some help with:

  1. Do I get the best possible AA quality to render to a 16-sample buffer or is there anything else I can do to improve AA?

  2. Is it possible to get a final 32-bit image where the RGB components are non-premultiplied by the alpha? I guess this could be difficult because ideally I would need antialiasing in just the alpha, but not in the RGB components.

Thanks.

Did you put a glFinish before you start your timer? If not, then you’re timing more than you intend, and it now includes an additional 16x downsample.

You right, I didn’t. It is still not so good, but at least I found out it is misleading to time just one frame. My timer stops at 23 msec for one frame (I added a glFinish before starting the timer), but measures 128 msec for rendering and reading the exact same thing ten times.

I suspect that only the first frame takes longer than the other nine, although I have no idea why, after glfinish I should have a clean slate.

Any thoughts on my other questions?

Thanks,

Are you timing with EXT_timer_query?

No, with timeGetTime().

After I set my frame and renderbuffer objects, I call glFinish then timeGetTime. Then I render 10 glut teapots,
blit it to a normal framebuffer and call readpixels.

Then I call glFinish+timeGetTime again and measure the difference.

This costs on average 39 ms. If I do this in a loop ten times, it takes only 220,
which is very far from being ten times 39.

I have a new question for you helping souls:

glew reports that I don’t have GL_NV_framebuffer_multisample_coverage

Why is that? I have a 8800GTS with Forceware 97.92. Shouldn’t this extension work?

Thanks.

That extension is only supported in the 100 series drivers, which I think are Vista-only.

Try using the other extension I mentioned for timing; it’s very simple.

Well, that’s bad news, I don’t plan to switch from XP for some time.

I tried timing this way and the result is somewhat different:

26 msec doing it once, and 217 doing it ten times.

It is still a lot. Is there any reason why a glut teapot takes this much time to render?

I read that it is possible to read from the VGA asynchronously using a pixel buffer. Is it possible with an FBO as well?

I would like to stick to using FBOs for off screen render and I also read somewhere that switching between PBOs is expensive.

Thanks.

The time spent rendering glut*Teapot is probably mostly software overhead. If you want to get a more accurate test of HW perf, I would suggest at least compiling the teapot into a display list, and then calling the list multiple times. The glut primitives are nice stock objects, but they are not optimized for high performance.

-Evan

I just used this teapot because it was simple and easy to make my card work hard.

What I am trying to get help for is to determine the best way to render to an off screen buffer and read back its content frame by frame. FBOs are the new thing, I thought that is the way to go.

I could not get multisampling to work by myself, thankfully I already got help with that. I am still not certain if FBOs are better than the older PBOs, I read that asynchronous readback is possible with a PBO. I am not sure the same is true with an FBO.

Is there any advantage using a PBO instead of FBOs? Please take into account that I need to read back my rendered buffer every frame, and quite possibly also need to ‘upload’ several textures to VGA memory every frame (if I want a sequence of images to use as texture).

Thanks.

>>Is there any advantage using a PBO instead of FBOs?<<

Wrong question.
Note that the “buffer object” in both of these shortcuts come from a different background.

Watch the dashes:
vertex-bufferobject and pixel-bufferobject, but framebuffer-object.
There is no “instead”, these are two different kinds of extensions.

Then there is something I really don’t understand (yet).

I can generate an FBO, attach renderbuffers, textures, render everything, read back the color buffer.

What should I use a PBO for?

Thanks.

You use PBO usually for asynchronous data transfer. This are same sort of buffers as VBOs, so this is basically server-stored data. But with VBOs you can use this data as vertex/index arrays, with PBOs you can use them as source or destination for image data commands(like glTexImage2D). You can, for example, render to a texture, copy it to PBO and then use it as a VBO, making it possible to create vertex arrays on the GPU. Or you can stream texture data to the GPU while doing something else. Read the specification…

OK, now I know the difference. Pbuffer and PBO is not the same thing.

I am still not sure if I understand everything correctly. Please let me know if I am wrong.

Is transferring texture data to PBOs aynshcronous even in the GPU? Can I do it in a separate thread on another CPU?

Ideally I would render to a color renderbuffer of an FBO, using one set of textures, and in parallel - if needed - download texture data to another set using PBOs. This set would be used for the next frame. This would certainly help if I wanted to use image sequences or video files in the scene. But is it possible?

For transfer from the GPU I am not sure if there is a real advantage compared to a normal readpixel call. If it is only asynchronous on the CPU side, I am not sure what I would do with the time won, although my application is designed with at least two CPU cores in mind.

Thanks.

There are two advantages:

  • if using PBO, the ReadPixels will return immediately, even before the read operation finish. So the algorithm is ReadPixels; dosomethingelse; read the buffer

  • you can read to more then one buffer at the same time. A nvidia paper suggested splitting the image in several times and using ReadPixels to upload each tile to different buffers. You can then process each tile when it is ready. This way you save time you would otherwise spend waiting for whole data be transferred to the CPU memory

Keep in mind, that PBO/VBO memory transfer is multithreaded on GPU, so you can transfer the memory while GPU does something else… Well, it should be that way :slight_smile:

Thanks.

I was thinking about how to spend the dosomethingelse time.

But reading and maybe processing buffers in chunks or tiles sounds interesting and possible. As far as I could measure readpixels, it costs me at least 1.5msec of CPU time. Since I do some image processing on the buffer after it, I can divide the buffer to e.g. 8 parts and lose only the time waiting for the first part to be read back. BTW How do I know transfer is finished?

So downloading textures using a separate thread/CPU is possible even while the GPU renders to a color renderbuffer of my FBO?

Thanks.