[v2,25/37] libcamera: software_isp: debayer_egl: Add an eGL debayer class
diff mbox series

Message ID 20250824-b4-v0-5-2-gpuisp-v2-a-v2-25-96f4576c814e@linaro.org
State New
Headers show
Series
  • Add GLES 2.0 GPUISP to libcamera
Related show

Commit Message

Bryan O'Donoghue Aug. 24, 2025, 12:48 a.m. UTC
Add a class to run the existing glsl debayer shaders on a GBM surface.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>

libcamera: software_isp: debayer_egl: Extend logic to enable application of softISP RGB debayer params

The existing SoftISP calculates RGB gain values as a lookup table of 256
values which shifts for each frame depending on the required correction.

We can pass the required tables into the debayer shaders as textures, one
texture for R, G and B respectively.

The debayer shader will do its debayer interpolation and then if the
appropriate define is specified use the calculated R, G and B values as
indexes into our bayer colour gain table.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
---
 src/libcamera/software_isp/debayer_egl.cpp | 628 +++++++++++++++++++++++++++++
 src/libcamera/software_isp/debayer_egl.h   | 171 ++++++++
 src/libcamera/software_isp/meson.build     |   8 +
 3 files changed, 807 insertions(+)

Comments

Robert Mader Aug. 29, 2025, 10:34 a.m. UTC | #1
Hey,

really exciting to see this working and performing so well already!

While looking over the code I spotted a few issues - which may explain 
why some people have been reporting seeing tearing/glitches - and got 
some ideas how to further reduce overhead / improve performance, see below.

On 24.08.25 02:48, Bryan O'Donoghue wrote:
> Add a class to run the existing glsl debayer shaders on a GBM surface.
>
> Signed-off-by: Bryan O'Donoghue<bryan.odonoghue@linaro.org>
>
> libcamera: software_isp: debayer_egl: Extend logic to enable application of softISP RGB debayer params
>
> The existing SoftISP calculates RGB gain values as a lookup table of 256
> values which shifts for each frame depending on the required correction.
>
> We can pass the required tables into the debayer shaders as textures, one
> texture for R, G and B respectively.
>
> The debayer shader will do its debayer interpolation and then if the
> appropriate define is specified use the calculated R, G and B values as
> indexes into our bayer colour gain table.
>
> Signed-off-by: Bryan O'Donoghue<bryan.odonoghue@linaro.org>
> ---
>   src/libcamera/software_isp/debayer_egl.cpp | 628 +++++++++++++++++++++++++++++
>   src/libcamera/software_isp/debayer_egl.h   | 171 ++++++++
>   src/libcamera/software_isp/meson.build     |   8 +
>   3 files changed, 807 insertions(+)
>
> diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp
> new file mode 100644
> index 0000000000000000000000000000000000000000..3932044a231ad8348f011369396556c5ad230ff6
> --- /dev/null
> +++ b/src/libcamera/software_isp/debayer_egl.cpp
> @@ -0,0 +1,628 @@
> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
> +/*
> + * Copyright (C) 2024, Linaro Ltd.
> + *
> + * Authors:
> + * Bryan O'Donoghue<bryan.odonoghue@linaro.org>
> + *
> + * debayer_cpu.cpp - EGL based debayering class
> + */
> +
> +#include <math.h>
> +#include <stdlib.h>
> +#include <time.h>
> +
> +#include <libcamera/formats.h>
> +
> +#include "libcamera/internal/glsl_shaders.h"
> +#include "debayer_egl.h"
> +
> +namespace libcamera {
> +
> +DebayerEGL::DebayerEGL(std::unique_ptr<SwStatsCpu> stats)
> +	: Debayer(), stats_(std::move(stats))
> +{
> +	eglImageBayerIn_ = eglImageBayerOut_= eglImageRedLookup_ = eglImageBlueLookup_ = eglImageGreenLookup_ = NULL;
> +}
> +
> +DebayerEGL::~DebayerEGL()
> +{
> +	if (eglImageBlueLookup_)
> +		delete eglImageBlueLookup_;
> +
> +	if (eglImageGreenLookup_)
> +		delete eglImageGreenLookup_;
> +
> +	if (eglImageRedLookup_)
> +		delete eglImageRedLookup_;
> +
> +	if (eglImageBayerOut_)
> +		delete eglImageBayerOut_;
> +
> +	if (eglImageBayerIn_)
> +		delete eglImageBayerIn_;
> +}
> +
> +int DebayerEGL::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)
> +{
> +	BayerFormat bayerFormat =
> +		BayerFormat::fromPixelFormat(inputFormat);
> +
> +	if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10) &&
> +	    bayerFormat.packing == BayerFormat::Packing::None &&
> +	    isStandardBayerOrder(bayerFormat.order)) {
> +		config.bpp = (bayerFormat.bitDepth + 7) & ~7;
> +		config.patternSize.width = 2;
> +		config.patternSize.height = 2;
> +		config.outputFormats = std::vector<PixelFormat>({ formats::XRGB8888,
> +								  formats::ARGB8888,
> +								  formats::XBGR8888,
> +								  formats::ABGR8888 });
> +		return 0;
> +	}
> +
> +	if (bayerFormat.bitDepth == 10 &&
> +	    bayerFormat.packing == BayerFormat::Packing::CSI2 &&
> +	    isStandardBayerOrder(bayerFormat.order)) {
> +		config.bpp = 10;
> +		config.patternSize.width = 4; /* 5 bytes per *4* pixels */
> +		config.patternSize.height = 2;
> +		config.outputFormats = std::vector<PixelFormat>({ formats::XRGB8888,
> +								  formats::ARGB8888,
> +								  formats::XBGR8888,
> +								  formats::ABGR8888 });
> +		return 0;
> +	}
> +
> +	LOG(Debayer, Error)
> +		<< "Unsupported input format " << inputFormat.toString();
> +
> +	return -EINVAL;
> +}
> +
> +int DebayerEGL::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)
> +{
> +	if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
> +	    outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
> +		config.bpp = 32;
> +		return 0;
> +	}
> +
> +	LOG(Debayer, Error)
> +		<< "Unsupported output format " << outputFormat.toString();
> +
> +	return -EINVAL;
> +}
> +
> +int DebayerEGL::getShaderVariableLocations(void)
> +{
> +	attributeVertex_ = glGetAttribLocation(programId_, "vertexIn");
> +	attributeTexture_ = glGetAttribLocation(programId_, "textureIn");
> +
> +	textureUniformBayerDataIn_ = glGetUniformLocation(programId_, "tex_y");
> +	textureUniformRedLookupDataIn_ = glGetUniformLocation(programId_, "red_param");
> +	textureUniformGreenLookupDataIn_ = glGetUniformLocation(programId_, "green_param");
> +	textureUniformBlueLookupDataIn_ = glGetUniformLocation(programId_, "blue_param");
> +	ccmUniformDataIn_ = glGetUniformLocation(programId_, "ccm");
> +
> +	textureUniformStep_ = glGetUniformLocation(programId_, "tex_step");
> +	textureUniformSize_ = glGetUniformLocation(programId_, "tex_size");
> +	textureUniformStrideFactor_ = glGetUniformLocation(programId_, "stride_factor");
> +	textureUniformBayerFirstRed_ = glGetUniformLocation(programId_, "tex_bayer_first_red");
> +	textureUniformProjMatrix_ = glGetUniformLocation(programId_, "proj_matrix");
> +
> +	LOG(Debayer, Debug) << "vertexIn " << attributeVertex_ << " textureIn " << attributeTexture_
> +			    << " tex_y " << textureUniformBayerDataIn_
> +			    << " red_param " << textureUniformRedLookupDataIn_
> +			    << " green_param " << textureUniformGreenLookupDataIn_
> +			    << " blue_param " << textureUniformBlueLookupDataIn_
> +			    << " ccm " << ccmUniformDataIn_
> +			    << " tex_step " << textureUniformStep_
> +			    << " tex_size " << textureUniformSize_
> +			    << " stride_factor " << textureUniformStrideFactor_
> +			    << " tex_bayer_first_red " << textureUniformBayerFirstRed_
> +			    << " proj_matrix " << textureUniformProjMatrix_;
> +	return 0;
> +}
> +
> +int DebayerEGL::initBayerShaders(PixelFormat inputFormat, PixelFormat outputFormat)
> +{
> +	std::vector<std::string> shaderEnv;
> +	unsigned int fragmentShaderDataLen;
> +	unsigned char *fragmentShaderData;
> +	unsigned int vertexShaderDataLen;
> +	unsigned char *vertexShaderData;
> +	GLenum err;
> +
> +	// Target gles 100 glsl requires "#version x" as first directive in shader
> +	egl_.pushEnv(shaderEnv, "#version 100");
> +
> +	// Specify GL_OES_EGL_image_external
> +	egl_.pushEnv(shaderEnv, "#extension GL_OES_EGL_image_external: enable");
> +
> +	// Tell shaders how to re-order output taking account of how the
> +	// pixels are actually stored by GBM
> +	switch (outputFormat) {
> +	case formats::ARGB8888:
> +	case formats::XRGB8888:
> +		break;
> +	case formats::ABGR8888:
> +	case formats::XBGR8888:
> +		egl_.pushEnv(shaderEnv, "#define SWAP_BLUE");
> +		break;
> +	default:
> +		goto invalid_fmt;
> +	}
> +
> +	// Pixel location parameters
> +	glFormat_ = GL_LUMINANCE;
> +	bytesPerPixel_ = 1;
> +	switch (inputFormat) {
> +	case libcamera::formats::SBGGR8:
> +	case libcamera::formats::SBGGR10_CSI2P:
> +	case libcamera::formats::SBGGR12_CSI2P:
> +		firstRed_x_ = 1.0;
> +		firstRed_y_ = 1.0;
> +		break;
> +	case libcamera::formats::SGBRG8:
> +	case libcamera::formats::SGBRG10_CSI2P:
> +	case libcamera::formats::SGBRG12_CSI2P:
> +		firstRed_x_ = 0.0;
> +		firstRed_y_ = 1.0;
> +		break;
> +	case libcamera::formats::SGRBG8:
> +	case libcamera::formats::SGRBG10_CSI2P:
> +	case libcamera::formats::SGRBG12_CSI2P:
> +		firstRed_x_ = 1.0;
> +		firstRed_y_ = 0.0;
> +		break;
> +	case libcamera::formats::SRGGB8:
> +	case libcamera::formats::SRGGB10_CSI2P:
> +	case libcamera::formats::SRGGB12_CSI2P:
> +		firstRed_x_ = 0.0;
> +		firstRed_y_ = 0.0;
> +		break;
> +	default:
> +		goto invalid_fmt;
> +		break;
> +	};
> +
> +	// Shader selection
> +	switch (inputFormat) {
> +	case libcamera::formats::SBGGR8:
> +	case libcamera::formats::SGBRG8:
> +	case libcamera::formats::SGRBG8:
> +	case libcamera::formats::SRGGB8:
> +		fragmentShaderData = bayer_unpacked_frag;
> +		fragmentShaderDataLen = bayer_unpacked_frag_len;
> +		vertexShaderData = bayer_unpacked_vert;
> +		vertexShaderDataLen = bayer_unpacked_vert_len;
> +		break;
> +	case libcamera::formats::SBGGR10_CSI2P:
> +	case libcamera::formats::SGBRG10_CSI2P:
> +	case libcamera::formats::SGRBG10_CSI2P:
> +	case libcamera::formats::SRGGB10_CSI2P:
> +		egl_.pushEnv(shaderEnv, "#define RAW10P");
> +		if (BayerFormat::fromPixelFormat(inputFormat).packing == BayerFormat::Packing::None) {
> +			fragmentShaderData = bayer_unpacked_frag;
> +			fragmentShaderDataLen = bayer_unpacked_frag_len;
> +			vertexShaderData = bayer_unpacked_vert;
> +			vertexShaderDataLen = bayer_unpacked_vert_len;
> +			glFormat_ = GL_RG;
> +			bytesPerPixel_ = 2;
> +		} else {
> +			fragmentShaderData = bayer_1x_packed_frag;
> +			fragmentShaderDataLen = bayer_1x_packed_frag_len;
> +			vertexShaderData = identity_vert;
> +			vertexShaderDataLen = identity_vert_len;
> +		}
> +		break;
> +	case libcamera::formats::SBGGR12_CSI2P:
> +	case libcamera::formats::SGBRG12_CSI2P:
> +	case libcamera::formats::SGRBG12_CSI2P:
> +	case libcamera::formats::SRGGB12_CSI2P:
> +		egl_.pushEnv(shaderEnv, "#define RAW12P");
> +		if (BayerFormat::fromPixelFormat(inputFormat).packing == BayerFormat::Packing::None) {
> +			fragmentShaderData = bayer_unpacked_frag;
> +			fragmentShaderDataLen = bayer_unpacked_frag_len;
> +			vertexShaderData = bayer_unpacked_vert;
> +			vertexShaderDataLen = bayer_unpacked_vert_len;
> +			glFormat_ = GL_RG;
> +			bytesPerPixel_ = 2;
> +		} else {
> +			fragmentShaderData = bayer_1x_packed_frag;
> +			fragmentShaderDataLen = bayer_1x_packed_frag_len;
> +			vertexShaderData = identity_vert;
> +			vertexShaderDataLen = identity_vert_len;
> +		}
> +		break;
> +	default:
> +		goto invalid_fmt;
> +		break;
> +	};
> +
> +	if (ccmEnabled_) {
> +		// Run the CCM if available
> +		egl_.pushEnv(shaderEnv, "#define APPLY_CCM_PARAMETERS");
> +	} else {
> +		// Flag to shaders that we have parameter gain tables
> +		egl_.pushEnv(shaderEnv, "#define APPLY_RGB_PARAMETERS");
> +	}
> +
> +	if (egl_.compileVertexShader(vertexShaderId_, vertexShaderData, vertexShaderDataLen, shaderEnv))
> +		goto compile_fail;
> +
> +	if (egl_.compileFragmentShader(fragmentShaderId_, fragmentShaderData, fragmentShaderDataLen, shaderEnv))
> +		goto compile_fail;
> +
> +	if (egl_.linkProgram(programId_, vertexShaderId_, fragmentShaderId_))
> +		goto link_fail;
> +
> +	egl_.dumpShaderSource(vertexShaderId_);
> +	egl_.dumpShaderSource(fragmentShaderId_);
> +
> +	/* Ensure we set the programId_ */
> +	egl_.useProgram(programId_);
> +	err = glGetError();
> +	if (err != GL_NO_ERROR)
> +		goto program_fail;
> +
> +	if (getShaderVariableLocations())
> +		goto parameters_fail;
> +
> +	return 0;
> +
> +parameters_fail:
> +	LOG(Debayer, Error) << "Program parameters fail";
> +	return -ENODEV;
> +
> +program_fail:
> +	LOG(Debayer, Error) << "Use program error " << err;
> +	return -ENODEV;
> +
> +link_fail:
> +	LOG(Debayer, Error) << "Linking program fail";
> +	return -ENODEV;
> +
> +compile_fail:
> +	LOG(Debayer, Error) << "Compile debayer shaders fail";
> +	return -ENODEV;
> +
> +invalid_fmt:
> +	LOG(Debayer, Error) << "Unsupported input output format combination";
> +	return -EINVAL;
> +}
> +
> +int DebayerEGL::configure(const StreamConfiguration &inputCfg,
> +			  const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs,
> +			  bool ccmEnabled)
> +{
> +	GLint maxTextureImageUnits;
> +
> +	if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)
> +		return -EINVAL;
> +
> +	if (stats_->configure(inputCfg) != 0)
> +		return -EINVAL;
> +
> +	const Size &stats_pattern_size = stats_->patternSize();
> +	if (inputConfig_.patternSize.width != stats_pattern_size.width ||
> +	    inputConfig_.patternSize.height != stats_pattern_size.height) {
> +		LOG(Debayer, Error)
> +			<< "mismatching stats and debayer pattern sizes for "
> +			<< inputCfg.pixelFormat.toString();
> +		return -EINVAL;
> +	}
> +
> +	inputConfig_.stride = inputCfg.stride;
> +	width_ = inputCfg.size.width;
> +	height_ = inputCfg.size.height;
> +	ccmEnabled_ = ccmEnabled;
> +
> +	if (outputCfgs.size() != 1) {
> +		LOG(Debayer, Error)
> +			<< "Unsupported number of output streams: "
> +			<< outputCfgs.size();
> +		return -EINVAL;
> +	}
> +
> +	if (gbmSurface_.createDevice())
> +		return -ENODEV;
> +
> +	if (egl_.initEGLContext(&gbmSurface_))
> +		return -ENODEV;
> +
> +	glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &maxTextureImageUnits);
> +
> +	LOG(Debayer, Debug) << "Available fragment shader texture units " << maxTextureImageUnits;
> +
> +	if (!ccmEnabled && maxTextureImageUnits < DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS) {
> +		LOG(Debayer, Error) << "Fragment shader texture unit count " << maxTextureImageUnits
> +				    << " required minimum for RGB gain table lookup " << DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS
> +				    << " try using an identity CCM ";
> +		return -ENODEV;
> +	}
> +
> +	StreamConfiguration &outputCfg = outputCfgs[0];
> +	SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);
> +	std::tie(outputConfig_.stride, outputConfig_.frameSize) =
> +		strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);
> +
> +	if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {
> +		LOG(Debayer, Error)
> +			<< "Invalid output size/stride: "
> +			<< "\n  " << outputCfg.size << " (" << outSizeRange << ")"
> +			<< "\n  " << outputCfg.stride << " (" << outputConfig_.stride << ")";
> +		return -EINVAL;
> +	}
> +
> +	window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &
> +		    ~(inputConfig_.patternSize.width - 1);
> +	window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &
> +		    ~(inputConfig_.patternSize.height - 1);
> +	window_.width = outputCfg.size.width;
> +	window_.height = outputCfg.size.height;
> +
> +	/*
> +	 * Don't pass x,y from window_ since process() already adjusts for it.
> +	 * But crop the window to 2/3 of its width and height for speedup.
> +	 */
> +	stats_->setWindow((window_.size() * 2 / 3).centeredTo(window_.center()));
> +
> +	// Raw bayer input as texture
> +	eglImageBayerIn_ = new eGLImage(width_, height_, 32, GL_TEXTURE0, 0);
> +	if (!eglImageBayerIn_)
> +		return -ENOMEM;
> +
> +	// Only do the RGB lookup table textures if CCM is disabled
> +	if (!ccmEnabled_) {
> +
> +		/// RGB correction tables as 2d textures
> +		// eGL doesn't support glTexImage1D so we do a little hack with 2D to compensate
> +		eglImageRedLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE1, 1);
> +		if (!eglImageRedLookup_)
> +			return -ENOMEM;
> +
> +		eglImageGreenLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE2, 2);
> +		if (!eglImageGreenLookup_)
> +			return -ENOMEM;
> +
> +		eglImageBlueLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE3, 3);
> +		if (!eglImageBlueLookup_)
> +			return -ENOMEM;
> +	}
> +
> +	eglImageBayerOut_ = new eGLImage(outputCfg.size.width, outputCfg.size.height, 32, outputCfg.stride, GL_TEXTURE4, 4);
> +	if (!eglImageBayerOut_)
> +		return -ENOMEM;
> +
> +	if (initBayerShaders(inputCfg.pixelFormat, outputCfg.pixelFormat))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +Size DebayerEGL::patternSize(PixelFormat inputFormat)
> +{
> +	DebayerEGL::DebayerInputConfig config;
> +
> +	if (getInputConfig(inputFormat, config) != 0)
> +		return {};
> +
> +	return config.patternSize;
> +}
> +
> +std::vector<PixelFormat> DebayerEGL::formats(PixelFormat inputFormat)
> +{
> +	DebayerEGL::DebayerInputConfig config;
> +
> +	if (getInputConfig(inputFormat, config) != 0)
> +		return std::vector<PixelFormat>();
> +
> +	return config.outputFormats;
> +}
> +
> +std::tuple<unsigned int, unsigned int>
> +DebayerEGL::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
> +{
> +	DebayerEGL::DebayerOutputConfig config;
> +
> +	if (getOutputConfig(outputFormat, config) != 0)
> +		return std::make_tuple(0, 0);
> +
> +	/* Align stride to 256 bytes as a generic GPU memory access alignment */
> +	unsigned int stride = ALIGN_TO(size.width * config.bpp / 8, 256);
> +
> +	return std::make_tuple(stride, stride * size.height);
> +}
> +
> +void DebayerEGL::setShaderVariableValues(void)
> +{
> +	/*
> +	 * Raw Bayer 8-bit, and packed raw Bayer 10-bit/12-bit formats
> +	 * are stored in a GL_LUMINANCE texture. The texture width is
> +	 * equal to the stride.
> +	 */
> +	GLfloat firstRed[] = { firstRed_x_, firstRed_y_ };
> +	GLfloat imgSize[] = { (GLfloat)width_,
> +			      (GLfloat)height_ };
> +	GLfloat Step[] = { static_cast<float>(bytesPerPixel_) / (inputConfig_.stride - 1),
> +			   1.0f / (height_ - 1) };
> +	GLfloat Stride = 1.0f;
> +	GLfloat projIdentityMatrix[] = {
> +		1, 0, 0, 0,
> +		0, 1, 0, 0,
> +		0, 0, 1, 0,
> +		0, 0, 0, 1
> +	};
> +
> +	// vertexIn - bayer_8.vert
> +	glEnableVertexAttribArray(attributeVertex_);
> +	glVertexAttribPointer(attributeVertex_, 2, GL_FLOAT, GL_TRUE,
> +			      2 * sizeof(GLfloat), vcoordinates);
> +
> +	// textureIn - bayer_8.vert
> +	glEnableVertexAttribArray(attributeTexture_);
> +	glVertexAttribPointer(attributeTexture_, 2, GL_FLOAT, GL_TRUE,
> +			      2 * sizeof(GLfloat), tcoordinates);
> +
> +	// Set the sampler2D to the respective texture unit for each texutre
> +	// To simultaneously sample multiple textures we need to use multiple
> +	// texture units
> +	glUniform1i(textureUniformBayerDataIn_, eglImageBayerIn_->texture_unit_uniform_id_);
> +	if (!ccmEnabled_) {
> +		glUniform1i(textureUniformRedLookupDataIn_, eglImageRedLookup_->texture_unit_uniform_id_);
> +		glUniform1i(textureUniformGreenLookupDataIn_, eglImageGreenLookup_->texture_unit_uniform_id_);
> +		glUniform1i(textureUniformBlueLookupDataIn_, eglImageBlueLookup_->texture_unit_uniform_id_);
> +	}
> +
> +	// These values are:
> +	// firstRed = tex_bayer_first_red - bayer_8.vert
> +	// imgSize = tex_size - bayer_8.vert
> +	// step = tex_step - bayer_8.vert
> +	// Stride = stride_factor identity.vert
> +	// textureUniformProjMatri = No scaling
> +	glUniform2fv(textureUniformBayerFirstRed_, 1, firstRed);
> +	glUniform2fv(textureUniformSize_, 1, imgSize);
> +	glUniform2fv(textureUniformStep_, 1, Step);
> +	glUniform1f(textureUniformStrideFactor_, Stride);
> +	glUniformMatrix4fv(textureUniformProjMatrix_, 1,
> +			   GL_FALSE, projIdentityMatrix);
> +
> +	LOG(Debayer, Debug) << "vertexIn " << attributeVertex_ << " textureIn " << attributeTexture_
> +			    << " tex_y " << textureUniformBayerDataIn_
> +			    << " red_param " << textureUniformRedLookupDataIn_
> +			    << " green_param " << textureUniformGreenLookupDataIn_
> +			    << " blue_param " << textureUniformBlueLookupDataIn_
> +			    << " tex_step " << textureUniformStep_
> +			    << " tex_size " << textureUniformSize_
> +			    << " stride_factor " << textureUniformStrideFactor_
> +			    << " tex_bayer_first_red " << textureUniformBayerFirstRed_;
> +
> +	LOG (Debayer, Debug) << "textureUniformY_ = 0 "
> +			     <<	" firstRed.x " << firstRed[0]
> +			     <<	" firstRed.y " << firstRed[1]
> +			     <<	" textureUniformSize_.width " << imgSize[0]
> +			     <<	" textureUniformSize_.height " << imgSize[1]
> +			     <<	" textureUniformStep_.x " << Step[0]
> +			     <<	" textureUniformStep_.y " << Step[1]
> +			     <<	" textureUniformStrideFactor_ " << Stride
> +			     <<	" textureUniformProjMatrix_ " << textureUniformProjMatrix_;
> +	return;
> +}
> +
> +void DebayerEGL::debayerGPU(MappedFrameBuffer &in, MappedFrameBuffer &out, DebayerParams &params)
> +{
> +	/* eGL context switch */
> +	egl_.makeCurrent();
> +
> +	/* Greate a standard texture input */
> +	egl_.createTexture2D(eglImageBayerIn_, glFormat_, inputConfig_.stride / bytesPerPixel_, height_, in.planes()[0].data());

Have you already tried if you can use eglCreateImageKHR() here as well? 
This texture creation is a bit unfortunate because it forces us to:

 1. sync the input/v4l2 dmabuf to CPU
 2. map it
 3. copy/"upload" the buffer into a newly allocated texture

while instead directly importing the buffer, like for the output buffer 
below, wouldn't need any of that.

As dmabuf import is not guaranteed to succeed we'd still need 
createTexture2D as fallback, however AFAICS createInputDMABufTexture2D() 
could be altered to using the same parameters - plus the fd, minus the 
map and converting the GL format to the matching DRM one - and create a 
equal/similar input texture without copy.

> +
> +	/* Generate the output render framebuffer as render to texture */
> +	egl_.createOutputDMABufTexture2D(eglImageBayerOut_, out.getPlaneFD(0));
> +
> +	/* Select the method we will use for bayer params CCM or params table */
> +	if (ccmEnabled_) {
> +		GLfloat ccm[9] = {
> +			params.ccm[0][0], params.ccm[0][1], params.ccm[0][2],
> +			params.ccm[1][0], params.ccm[1][1], params.ccm[1][2],
> +			params.ccm[2][0], params.ccm[2][1], params.ccm[2][2],
> +		};
> +		glUniformMatrix3fv(ccmUniformDataIn_, 1, GL_FALSE, ccm);
> +	} else {
> +		egl_.createTexture2D(eglImageRedLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1, &params.red);
> +		egl_.createTexture2D(eglImageGreenLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1, &params.green);
> +		egl_.createTexture2D(eglImageBlueLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1, &params.blue);
> +	}
> +
> +	setShaderVariableValues();
> +	glViewport(0, 0, width_, height_);
> +	glClear(GL_COLOR_BUFFER_BIT);
> +	glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS);
> +
> +	GLenum err = glGetError();
> +	if (err != GL_NO_ERROR) {
> +		LOG(eGL, Error) << "Drawing scene fail " << err;
> +	} else {
> +		egl_.syncOutput();
> +	}
> +
> +	/* Teardown the output texture */
> +	egl_.destroyDMABufTexture(eglImageBayerOut_);
Probably a bit early, however: it would be nice to reuse the textures at 
some point. That's what Wayland compositors do with dmabufs from clients 
because image creation has quite a bit of overhead and can be avoided if 
just the buffer content changes (would still need one texture per buffer 
- i.e. 7/8? in total for the input and output pool).
> +}
> +
> +void DebayerEGL::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, DebayerParams params)
> +{
> +	bench_.startFrame();
> +
> +	std::vector<DmaSyncer> dmaSyncers;
> +
> +	dmaSyncBegin(dmaSyncers, input, output);

This wrong now, at least with regards to the output buffer. Note that 
the sync is only needed for CPU access - so in the future, when 
debayerGPU() will hopefully only use the GPU while CPU is limited to 
stats_->processFrame(), we can do something like this:

diff --git a/src/libcamera/software_isp/debayer.cpp 
b/src/libcamera/software_isp/debayer.cpp index c16ce44b1..d277d3b6a 
100644 --- a/src/libcamera/software_isp/debayer.cpp +++ 
b/src/libcamera/software_isp/debayer.cpp @@ -223,8 +223,10 @@ void 
Debayer::dmaSyncBegin(std::vector<DmaSyncer> &dmaSyncers, FrameBuffer 
*inpu for (const FrameBuffer::Plane &plane : input->planes()) 
dmaSyncers.emplace_back(plane.fd, DmaSyncer::SyncType::Read); - for 
(const FrameBuffer::Plane &plane : output->planes()) - 
dmaSyncers.emplace_back(plane.fd, DmaSyncer::SyncType::Write); + if 
(output) { + for (const FrameBuffer::Plane &plane : output->planes()) + 
dmaSyncers.emplace_back(plane.fd, DmaSyncer::SyncType::Write); + } } /** 
diff --git a/src/libcamera/software_isp/debayer_egl.cpp 
b/src/libcamera/software_isp/debayer_egl.cpp index 3932044a2..ded2d27f7 
100644 --- a/src/libcamera/software_isp/debayer_egl.cpp +++ 
b/src/libcamera/software_isp/debayer_egl.cpp @@ -558,8 +558,6 @@ void 
DebayerEGL::process(uint32_t frame, FrameBuffer *input, FrameBuffer 
*output std::vector<DmaSyncer> dmaSyncers; - dmaSyncBegin(dmaSyncers, 
input, output); - setParams(params); /* Copy metadata from the input 
buffer */ @@ -578,14 +576,14 @@ void DebayerEGL::process(uint32_t frame, 
FrameBuffer *input, FrameBuffer *output debayerGPU(in, out, params); - 
dmaSyncers.clear(); - bench_.finishFrame(); 
metadata.planes()[0].bytesused = out.planes()[0].size(); /* Calculate 
stats for the whole frame */ + dmaSyncBegin(dmaSyncers, input, nullptr); 
stats_->processFrame(frame, 0, input); + dmaSyncers.clear(); 
outputBufferReady.emit(output); inputBufferReady.emit(input);

However as long as the input buffer doesn't get directly imported we 
should at least limit the sync to the input buffer, i.e. do something like:

diff --git a/src/libcamera/software_isp/debayer_egl.cpp 
b/src/libcamera/software_isp/debayer_egl.cpp
index 3932044a2..9f251720e 100644
--- a/src/libcamera/software_isp/debayer_egl.cpp
+++ b/src/libcamera/software_isp/debayer_egl.cpp
@@ -558,7 +558,7 @@ void DebayerEGL::process(uint32_t frame, FrameBuffer 
*input, FrameBuffer *output

         std::vector<DmaSyncer> dmaSyncers;

-       dmaSyncBegin(dmaSyncers, input, output);
+       dmaSyncBegin(dmaSyncers, input, nullptr);

         setParams(params);

@@ -578,14 +578,13 @@ void DebayerEGL::process(uint32_t frame, 
FrameBuffer *input, FrameBuffer *output

         debayerGPU(in, out, params);

-       dmaSyncers.clear();
-
         bench_.finishFrame();

         metadata.planes()[0].bytesused = out.planes()[0].size();

         /* Calculate stats for the whole frame */
         stats_->processFrame(frame, 0, input);
+       dmaSyncers.clear();

         outputBufferReady.emit(output);
         inputBufferReady.emit(input);

I'm not quite sure what the write-sync on the output buffer currently 
does on affected platforms (mainly aarch64), however IIUC it 
could/should cause a race between the GPU writing to the buffer and the 
CPU flushing the corresponding caches, potentially overwriting the new 
data from the GPU with old data from the CPU, explaining glitches people 
have been reporting.

> +
> +	setParams(params);
> +
> +	/* Copy metadata from the input buffer */
> +	FrameMetadata &metadata = output->_d()->metadata();
> +	metadata.status = input->metadata().status;
> +	metadata.sequence = input->metadata().sequence;
> +	metadata.timestamp = input->metadata().timestamp;
> +
> +	MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
> +	MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);

I wonder whether we can improve how we mmap the buffers here. For the 
output buffer we should already be able to just use the FrameBuffer, 
while the input buffer will get mapped in stats_->processFrame() 
eventually. The kernels page cache should avoid most overhead, however 
calling dma_buf_mmap() is AFAIK non-negligible and once we actually do 
read we always trigger pagefaults on the first read after mapping if I'm 
not mistaken.

Judging from Gstreamer and how GST_FD_MEMORY_FLAG_KEEP_MAPPED is used in 
various cases it'll probably be best to never map the output buffers 
while always keeping the input buffers read-mapped. In case of dmabufs 
that shouldn't cause additional sync work or copies (as that's handled 
by DmaSyncer/DMA_BUF_IOCTL_SYNC), while reducing overhead - mainly 
syscalls - quite a bit. Maybe we can add some wrapper class for 
FrameBuffer, similar to MappedFrameBuffer, holding the cached EGL images 
and mappings as required.

So far for now and best regards,

Robert

> +	if (!in.isValid() || !out.isValid()) {
> +		LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
> +		metadata.status = FrameMetadata::FrameError;
> +		return;
> +	}
> +
> +	debayerGPU(in, out, params);
> +
> +	dmaSyncers.clear();
> +
> +	bench_.finishFrame();
> +
> +	metadata.planes()[0].bytesused = out.planes()[0].size();
> +
> +	/* Calculate stats for the whole frame */
> +	stats_->processFrame(frame, 0, input);
> +
> +	outputBufferReady.emit(output);
> +	inputBufferReady.emit(input);
> +}
> +
> +void DebayerEGL::stop()
> +{
> +	egl_.cleanUp();
> +}
> +
> +SizeRange DebayerEGL::sizes(PixelFormat inputFormat, const Size &inputSize)
> +{
> +	Size patternSize = this->patternSize(inputFormat);
> +	unsigned int borderHeight = patternSize.height;
> +
> +	if (patternSize.isNull())
> +		return {};
> +
> +	/* No need for top/bottom border with a pattern height of 2 */
> +	if (patternSize.height == 2)
> +		borderHeight = 0;
> +
> +	/*
> +	 * For debayer interpolation a border is kept around the entire image
> +	 * and the minimum output size is pattern-height x pattern-width.
> +	 */
> +	if (inputSize.width < (3 * patternSize.width) ||
> +	    inputSize.height < (2 * borderHeight + patternSize.height)) {
> +		LOG(Debayer, Warning)
> +			<< "Input format size too small: " << inputSize.toString();
> +		return {};
> +	}
> +
> +	return SizeRange(Size(patternSize.width, patternSize.height),
> +			 Size((inputSize.width - 2 * patternSize.width) & ~(patternSize.width - 1),
> +			      (inputSize.height - 2 * borderHeight) & ~(patternSize.height - 1)),
> +			 patternSize.width, patternSize.height);
> +}
> +
> +} /* namespace libcamera */
> diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h
> new file mode 100644
> index 0000000000000000000000000000000000000000..ecb22fcb7f3a7d74c3a605a5351ea5871df24f5d
> --- /dev/null
> +++ b/src/libcamera/software_isp/debayer_egl.h
> @@ -0,0 +1,171 @@
> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
> +/*
> + * Copyright (C) 2025, Bryan O'Donoghue.
> + *
> + * Authors:
> + * Bryan O'Donoghue<bryan.odonoghue@linaro.org>
> + *
> + * debayer_opengl.h - EGL debayer header
> + */
> +
> +#pragma once
> +
> +#include <memory>
> +#include <stdint.h>
> +#include <vector>
> +
> +#define GL_GLEXT_PROTOTYPES
> +#define EGL_EGLEXT_PROTOTYPES
> +#include <EGL/egl.h>
> +#include <EGL/eglext.h>
> +#include <GLES3/gl32.h>
> +
> +#include <libcamera/base/object.h>
> +
> +#include "debayer.h"
> +
> +#include "libcamera/internal/bayer_format.h"
> +#include "libcamera/internal/egl.h"
> +#include "libcamera/internal/framebuffer.h"
> +#include "libcamera/internal/mapped_framebuffer.h"
> +#include "libcamera/internal/software_isp/benchmark.h"
> +#include "libcamera/internal/software_isp/swstats_cpu.h"
> +
> +namespace libcamera {
> +
> +#define DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS 4
> +#define DEBAYER_OPENGL_COORDS 4
> +
> +/**
> + * \class DebayerEGL
> + * \brief Class for debayering using an EGL Shader
> + *
> + * Implements an EGL shader based debayering solution.
> + */
> +class DebayerEGL : public Debayer
> +{
> +public:
> +	/**
> +	 * \brief Constructs a DebayerEGL object.
> +	 * \param[in] stats Pointer to the stats object to use.
> +	 */
> +	DebayerEGL(std::unique_ptr<SwStatsCpu> stats);
> +	~DebayerEGL();
> +
> +	/*
> +	 * Setup the Debayer object according to the passed in parameters.
> +	 * Return 0 on success, a negative errno value on failure
> +	 * (unsupported parameters).
> +	 */
> +	int configure(const StreamConfiguration &inputCfg,
> +		      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs,
> +		      bool ccmEnabled);
> +
> +	/*
> +	 * Get width and height at which the bayer-pattern repeats.
> +	 * Return pattern-size or an empty Size for an unsupported inputFormat.
> +	 */
> +	Size patternSize(PixelFormat inputFormat);
> +
> +	std::vector<PixelFormat> formats(PixelFormat input);
> +	std::tuple<unsigned int, unsigned int> strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
> +
> +	void process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, DebayerParams params);
> +	void stop();
> +
> +	/**
> +	 * \brief Get the file descriptor for the statistics.
> +	 *
> +	 * \return the file descriptor pointing to the statistics.
> +	 */
> +	const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
> +
> +	/**
> +	 * \brief Get the output frame size.
> +	 *
> +	 * \return The output frame size.
> +	 */
> +	unsigned int frameSize() { return outputConfig_.frameSize; }
> +
> +	SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);
> +
> +private:
> +	static int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);
> +	static int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);
> +	int setupStandardBayerOrder(BayerFormat::Order order);
> +	void pushEnv(std::vector<std::string> &shaderEnv, const char *str);
> +	int initBayerShaders(PixelFormat inputFormat, PixelFormat outputFormat);
> +	int initEGLContext();
> +	int generateTextures();
> +	int compileShaderProgram(GLuint &shaderId, GLenum shaderType,
> +				 unsigned char *shaderData, int shaderDataLen,
> +				 std::vector<std::string> shaderEnv);
> +	int linkShaderProgram(void);
> +	int getShaderVariableLocations();
> +	void setShaderVariableValues(void);
> +	void configureTexture(GLuint &texture);
> +	void debayerGPU(MappedFrameBuffer &in, MappedFrameBuffer &out, DebayerParams &params);
> +
> +	// Shader program identifiers
> +	GLuint vertexShaderId_;
> +	GLuint fragmentShaderId_;
> +	GLuint programId_;
> +	enum {
> +		BAYER_INPUT_INDEX = 0,
> +		BAYER_OUTPUT_INDEX,
> +		BAYER_BUF_NUM,
> +	};
> +
> +	// Pointer to object representing input texture
> +	eGLImage *eglImageBayerIn_;
> +	eGLImage *eglImageBayerOut_;
> +
> +	eGLImage *eglImageRedLookup_;
> +	eGLImage *eglImageGreenLookup_;
> +	eGLImage *eglImageBlueLookup_;
> +
> +	// Shader parameters
> +	float firstRed_x_;
> +	float firstRed_y_;
> +	GLint attributeVertex_;
> +	GLint attributeTexture_;
> +	GLint textureUniformStep_;
> +	GLint textureUniformSize_;
> +	GLint textureUniformStrideFactor_;
> +	GLint textureUniformBayerFirstRed_;
> +	GLint textureUniformProjMatrix_;
> +
> +	GLint textureUniformBayerDataIn_;
> +
> +	// These textures will either point to simple RGB gains or to CCM lookup tables
> +	GLint textureUniformRedLookupDataIn_;
> +	GLint textureUniformGreenLookupDataIn_;
> +	GLint textureUniformBlueLookupDataIn_;
> +
> +	// Represent per-frame CCM as a uniform vector of floats 3 x 3
> +	GLint ccmUniformDataIn_;
> +	bool ccmEnabled_;
> +
> +	Rectangle window_;
> +	std::unique_ptr<SwStatsCpu> stats_;
> +	eGL egl_;
> +	GBM gbmSurface_;
> +	uint32_t width_;
> +	uint32_t height_;
> +	GLint glFormat_;
> +	unsigned int bytesPerPixel_;
> +	GLfloat vcoordinates[DEBAYER_OPENGL_COORDS][2] = {
> +		{ -1.0f, -1.0f },
> +		{ -1.0f, +1.0f },
> +		{ +1.0f, +1.0f },
> +		{ +1.0f, -1.0f },
> +	};
> +	GLfloat tcoordinates[DEBAYER_OPENGL_COORDS][2] = {
> +		{ 0.0f, 0.0f },
> +		{ 0.0f, 1.0f },
> +		{ 1.0f, 1.0f },
> +		{ 1.0f, 0.0f },
> +	};
> +};
> +
> +} /* namespace libcamera */
> diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build
> index 59fa5f02a0a5620fa524d8a171332f04e0f769b2..c61ac7d59d37c5ef49ac67fe74cbcda3d89c30cb 100644
> --- a/src/libcamera/software_isp/meson.build
> +++ b/src/libcamera/software_isp/meson.build
> @@ -2,6 +2,7 @@
>   
>   softisp_enabled = pipelines.contains('simple')
>   summary({'SoftISP support' : softisp_enabled}, section : 'Configuration')
> +summary({'SoftISP GPU acceleration' : gles_headless_enabled}, section : 'Configuration')
>   
>   if not softisp_enabled
>       subdir_done()
> @@ -14,3 +15,10 @@ libcamera_internal_sources += files([
>       'software_isp.cpp',
>       'swstats_cpu.cpp',
>   ])
> +
> +if softisp_enabled and gles_headless_enabled
> +    config_h.set('HAVE_DEBAYER_EGL', 1)
> +    libcamera_internal_sources += files([
> +        'debayer_egl.cpp',
> +    ])
> +endif
>
Milan Zamazal Aug. 29, 2025, 5:30 p.m. UTC | #2
Hi Robert,

thank you for your analysis.  Just my bit of response:

Robert Mader <robert.mader@collabora.com> writes:

> Hey,
>
> really exciting to see this working and performing so well already!
>
> While looking over the code I spotted a few issues - which may explain 
> why some people have been reporting seeing tearing/glitches - and got 
> some ideas how to further reduce overhead / improve performance, see below.
>
> On 24.08.25 02:48, Bryan O'Donoghue wrote:
>> Add a class to run the existing glsl debayer shaders on a GBM surface.
>>
>> Signed-off-by: Bryan O'Donoghue<bryan.odonoghue@linaro.org>
>>
>> libcamera: software_isp: debayer_egl: Extend logic to enable application of softISP RGB debayer params
>>
>> The existing SoftISP calculates RGB gain values as a lookup table of 256
>> values which shifts for each frame depending on the required correction.
>>
>> We can pass the required tables into the debayer shaders as textures, one
>> texture for R, G and B respectively.
>>
>> The debayer shader will do its debayer interpolation and then if the
>> appropriate define is specified use the calculated R, G and B values as
>> indexes into our bayer colour gain table.
>>
>> Signed-off-by: Bryan O'Donoghue<bryan.odonoghue@linaro.org>
>> ---
>>   src/libcamera/software_isp/debayer_egl.cpp | 628 +++++++++++++++++++++++++++++
>>   src/libcamera/software_isp/debayer_egl.h   | 171 ++++++++
>>   src/libcamera/software_isp/meson.build     |   8 +
>>   3 files changed, 807 insertions(+)
>>
>> diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..3932044a231ad8348f011369396556c5ad230ff6
>> --- /dev/null
>> +++ b/src/libcamera/software_isp/debayer_egl.cpp
>> @@ -0,0 +1,628 @@
>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
>> +/*
>> + * Copyright (C) 2024, Linaro Ltd.
>> + *
>> + * Authors:
>> + * Bryan O'Donoghue<bryan.odonoghue@linaro.org>
>> + *
>> + * debayer_cpu.cpp - EGL based debayering class
>> + */
>> +
>> +#include <math.h>
>> +#include <stdlib.h>
>> +#include <time.h>
>> +
>> +#include <libcamera/formats.h>
>> +
>> +#include "libcamera/internal/glsl_shaders.h"
>> +#include "debayer_egl.h"
>> +
>> +namespace libcamera {
>> +
>> +DebayerEGL::DebayerEGL(std::unique_ptr<SwStatsCpu> stats)
>> +	: Debayer(), stats_(std::move(stats))
>> +{
>> + eglImageBayerIn_ = eglImageBayerOut_= eglImageRedLookup_ = eglImageBlueLookup_ = eglImageGreenLookup_
>> = NULL;
>> +}
>> +
>> +DebayerEGL::~DebayerEGL()
>> +{
>> +	if (eglImageBlueLookup_)
>> +		delete eglImageBlueLookup_;
>> +
>> +	if (eglImageGreenLookup_)
>> +		delete eglImageGreenLookup_;
>> +
>> +	if (eglImageRedLookup_)
>> +		delete eglImageRedLookup_;
>> +
>> +	if (eglImageBayerOut_)
>> +		delete eglImageBayerOut_;
>> +
>> +	if (eglImageBayerIn_)
>> +		delete eglImageBayerIn_;
>> +}
>> +
>> +int DebayerEGL::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)
>> +{
>> +	BayerFormat bayerFormat =
>> +		BayerFormat::fromPixelFormat(inputFormat);
>> +
>> +	if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10) &&
>> +	    bayerFormat.packing == BayerFormat::Packing::None &&
>> +	    isStandardBayerOrder(bayerFormat.order)) {
>> +		config.bpp = (bayerFormat.bitDepth + 7) & ~7;
>> +		config.patternSize.width = 2;
>> +		config.patternSize.height = 2;
>> +		config.outputFormats = std::vector<PixelFormat>({ formats::XRGB8888,
>> +								  formats::ARGB8888,
>> +								  formats::XBGR8888,
>> +								  formats::ABGR8888 });
>> +		return 0;
>> +	}
>> +
>> +	if (bayerFormat.bitDepth == 10 &&
>> +	    bayerFormat.packing == BayerFormat::Packing::CSI2 &&
>> +	    isStandardBayerOrder(bayerFormat.order)) {
>> +		config.bpp = 10;
>> +		config.patternSize.width = 4; /* 5 bytes per *4* pixels */
>> +		config.patternSize.height = 2;
>> +		config.outputFormats = std::vector<PixelFormat>({ formats::XRGB8888,
>> +								  formats::ARGB8888,
>> +								  formats::XBGR8888,
>> +								  formats::ABGR8888 });
>> +		return 0;
>> +	}
>> +
>> +	LOG(Debayer, Error)
>> +		<< "Unsupported input format " << inputFormat.toString();
>> +
>> +	return -EINVAL;
>> +}
>> +
>> +int DebayerEGL::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)
>> +{
>> +	if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
>> +	    outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
>> +		config.bpp = 32;
>> +		return 0;
>> +	}
>> +
>> +	LOG(Debayer, Error)
>> +		<< "Unsupported output format " << outputFormat.toString();
>> +
>> +	return -EINVAL;
>> +}
>> +
>> +int DebayerEGL::getShaderVariableLocations(void)
>> +{
>> +	attributeVertex_ = glGetAttribLocation(programId_, "vertexIn");
>> +	attributeTexture_ = glGetAttribLocation(programId_, "textureIn");
>> +
>> +	textureUniformBayerDataIn_ = glGetUniformLocation(programId_, "tex_y");
>> +	textureUniformRedLookupDataIn_ = glGetUniformLocation(programId_, "red_param");
>> +	textureUniformGreenLookupDataIn_ = glGetUniformLocation(programId_, "green_param");
>> +	textureUniformBlueLookupDataIn_ = glGetUniformLocation(programId_, "blue_param");
>> +	ccmUniformDataIn_ = glGetUniformLocation(programId_, "ccm");
>> +
>> +	textureUniformStep_ = glGetUniformLocation(programId_, "tex_step");
>> +	textureUniformSize_ = glGetUniformLocation(programId_, "tex_size");
>> +	textureUniformStrideFactor_ = glGetUniformLocation(programId_, "stride_factor");
>> +	textureUniformBayerFirstRed_ = glGetUniformLocation(programId_, "tex_bayer_first_red");
>> +	textureUniformProjMatrix_ = glGetUniformLocation(programId_, "proj_matrix");
>> +
>> +	LOG(Debayer, Debug) << "vertexIn " << attributeVertex_ << " textureIn " << attributeTexture_
>> +			    << " tex_y " << textureUniformBayerDataIn_
>> +			    << " red_param " << textureUniformRedLookupDataIn_
>> +			    << " green_param " << textureUniformGreenLookupDataIn_
>> +			    << " blue_param " << textureUniformBlueLookupDataIn_
>> +			    << " ccm " << ccmUniformDataIn_
>> +			    << " tex_step " << textureUniformStep_
>> +			    << " tex_size " << textureUniformSize_
>> +			    << " stride_factor " << textureUniformStrideFactor_
>> +			    << " tex_bayer_first_red " << textureUniformBayerFirstRed_
>> +			    << " proj_matrix " << textureUniformProjMatrix_;
>> +	return 0;
>> +}
>> +
>> +int DebayerEGL::initBayerShaders(PixelFormat inputFormat, PixelFormat outputFormat)
>> +{
>> +	std::vector<std::string> shaderEnv;
>> +	unsigned int fragmentShaderDataLen;
>> +	unsigned char *fragmentShaderData;
>> +	unsigned int vertexShaderDataLen;
>> +	unsigned char *vertexShaderData;
>> +	GLenum err;
>> +
>> +	// Target gles 100 glsl requires "#version x" as first directive in shader
>> +	egl_.pushEnv(shaderEnv, "#version 100");
>> +
>> +	// Specify GL_OES_EGL_image_external
>> +	egl_.pushEnv(shaderEnv, "#extension GL_OES_EGL_image_external: enable");
>> +
>> +	// Tell shaders how to re-order output taking account of how the
>> +	// pixels are actually stored by GBM
>> +	switch (outputFormat) {
>> +	case formats::ARGB8888:
>> +	case formats::XRGB8888:
>> +		break;
>> +	case formats::ABGR8888:
>> +	case formats::XBGR8888:
>> +		egl_.pushEnv(shaderEnv, "#define SWAP_BLUE");
>> +		break;
>> +	default:
>> +		goto invalid_fmt;
>> +	}
>> +
>> +	// Pixel location parameters
>> +	glFormat_ = GL_LUMINANCE;
>> +	bytesPerPixel_ = 1;
>> +	switch (inputFormat) {
>> +	case libcamera::formats::SBGGR8:
>> +	case libcamera::formats::SBGGR10_CSI2P:
>> +	case libcamera::formats::SBGGR12_CSI2P:
>> +		firstRed_x_ = 1.0;
>> +		firstRed_y_ = 1.0;
>> +		break;
>> +	case libcamera::formats::SGBRG8:
>> +	case libcamera::formats::SGBRG10_CSI2P:
>> +	case libcamera::formats::SGBRG12_CSI2P:
>> +		firstRed_x_ = 0.0;
>> +		firstRed_y_ = 1.0;
>> +		break;
>> +	case libcamera::formats::SGRBG8:
>> +	case libcamera::formats::SGRBG10_CSI2P:
>> +	case libcamera::formats::SGRBG12_CSI2P:
>> +		firstRed_x_ = 1.0;
>> +		firstRed_y_ = 0.0;
>> +		break;
>> +	case libcamera::formats::SRGGB8:
>> +	case libcamera::formats::SRGGB10_CSI2P:
>> +	case libcamera::formats::SRGGB12_CSI2P:
>> +		firstRed_x_ = 0.0;
>> +		firstRed_y_ = 0.0;
>> +		break;
>> +	default:
>> +		goto invalid_fmt;
>> +		break;
>> +	};
>> +
>> +	// Shader selection
>> +	switch (inputFormat) {
>> +	case libcamera::formats::SBGGR8:
>> +	case libcamera::formats::SGBRG8:
>> +	case libcamera::formats::SGRBG8:
>> +	case libcamera::formats::SRGGB8:
>> +		fragmentShaderData = bayer_unpacked_frag;
>> +		fragmentShaderDataLen = bayer_unpacked_frag_len;
>> +		vertexShaderData = bayer_unpacked_vert;
>> +		vertexShaderDataLen = bayer_unpacked_vert_len;
>> +		break;
>> +	case libcamera::formats::SBGGR10_CSI2P:
>> +	case libcamera::formats::SGBRG10_CSI2P:
>> +	case libcamera::formats::SGRBG10_CSI2P:
>> +	case libcamera::formats::SRGGB10_CSI2P:
>> +		egl_.pushEnv(shaderEnv, "#define RAW10P");
>> +		if (BayerFormat::fromPixelFormat(inputFormat).packing == BayerFormat::Packing::None) {
>> +			fragmentShaderData = bayer_unpacked_frag;
>> +			fragmentShaderDataLen = bayer_unpacked_frag_len;
>> +			vertexShaderData = bayer_unpacked_vert;
>> +			vertexShaderDataLen = bayer_unpacked_vert_len;
>> +			glFormat_ = GL_RG;
>> +			bytesPerPixel_ = 2;
>> +		} else {
>> +			fragmentShaderData = bayer_1x_packed_frag;
>> +			fragmentShaderDataLen = bayer_1x_packed_frag_len;
>> +			vertexShaderData = identity_vert;
>> +			vertexShaderDataLen = identity_vert_len;
>> +		}
>> +		break;
>> +	case libcamera::formats::SBGGR12_CSI2P:
>> +	case libcamera::formats::SGBRG12_CSI2P:
>> +	case libcamera::formats::SGRBG12_CSI2P:
>> +	case libcamera::formats::SRGGB12_CSI2P:
>> +		egl_.pushEnv(shaderEnv, "#define RAW12P");
>> +		if (BayerFormat::fromPixelFormat(inputFormat).packing == BayerFormat::Packing::None) {
>> +			fragmentShaderData = bayer_unpacked_frag;
>> +			fragmentShaderDataLen = bayer_unpacked_frag_len;
>> +			vertexShaderData = bayer_unpacked_vert;
>> +			vertexShaderDataLen = bayer_unpacked_vert_len;
>> +			glFormat_ = GL_RG;
>> +			bytesPerPixel_ = 2;
>> +		} else {
>> +			fragmentShaderData = bayer_1x_packed_frag;
>> +			fragmentShaderDataLen = bayer_1x_packed_frag_len;
>> +			vertexShaderData = identity_vert;
>> +			vertexShaderDataLen = identity_vert_len;
>> +		}
>> +		break;
>> +	default:
>> +		goto invalid_fmt;
>> +		break;
>> +	};
>> +
>> +	if (ccmEnabled_) {
>> +		// Run the CCM if available
>> +		egl_.pushEnv(shaderEnv, "#define APPLY_CCM_PARAMETERS");
>> +	} else {
>> +		// Flag to shaders that we have parameter gain tables
>> +		egl_.pushEnv(shaderEnv, "#define APPLY_RGB_PARAMETERS");
>> +	}
>> +
>> +	if (egl_.compileVertexShader(vertexShaderId_, vertexShaderData, vertexShaderDataLen, shaderEnv))
>> +		goto compile_fail;
>> +
>> + if (egl_.compileFragmentShader(fragmentShaderId_, fragmentShaderData, fragmentShaderDataLen,
>> shaderEnv))
>> +		goto compile_fail;
>> +
>> +	if (egl_.linkProgram(programId_, vertexShaderId_, fragmentShaderId_))
>> +		goto link_fail;
>> +
>> +	egl_.dumpShaderSource(vertexShaderId_);
>> +	egl_.dumpShaderSource(fragmentShaderId_);
>> +
>> +	/* Ensure we set the programId_ */
>> +	egl_.useProgram(programId_);
>> +	err = glGetError();
>> +	if (err != GL_NO_ERROR)
>> +		goto program_fail;
>> +
>> +	if (getShaderVariableLocations())
>> +		goto parameters_fail;
>> +
>> +	return 0;
>> +
>> +parameters_fail:
>> +	LOG(Debayer, Error) << "Program parameters fail";
>> +	return -ENODEV;
>> +
>> +program_fail:
>> +	LOG(Debayer, Error) << "Use program error " << err;
>> +	return -ENODEV;
>> +
>> +link_fail:
>> +	LOG(Debayer, Error) << "Linking program fail";
>> +	return -ENODEV;
>> +
>> +compile_fail:
>> +	LOG(Debayer, Error) << "Compile debayer shaders fail";
>> +	return -ENODEV;
>> +
>> +invalid_fmt:
>> +	LOG(Debayer, Error) << "Unsupported input output format combination";
>> +	return -EINVAL;
>> +}
>> +
>> +int DebayerEGL::configure(const StreamConfiguration &inputCfg,
>> +			  const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs,
>> +			  bool ccmEnabled)
>> +{
>> +	GLint maxTextureImageUnits;
>> +
>> +	if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)
>> +		return -EINVAL;
>> +
>> +	if (stats_->configure(inputCfg) != 0)
>> +		return -EINVAL;
>> +
>> +	const Size &stats_pattern_size = stats_->patternSize();
>> +	if (inputConfig_.patternSize.width != stats_pattern_size.width ||
>> +	    inputConfig_.patternSize.height != stats_pattern_size.height) {
>> +		LOG(Debayer, Error)
>> +			<< "mismatching stats and debayer pattern sizes for "
>> +			<< inputCfg.pixelFormat.toString();
>> +		return -EINVAL;
>> +	}
>> +
>> +	inputConfig_.stride = inputCfg.stride;
>> +	width_ = inputCfg.size.width;
>> +	height_ = inputCfg.size.height;
>> +	ccmEnabled_ = ccmEnabled;
>> +
>> +	if (outputCfgs.size() != 1) {
>> +		LOG(Debayer, Error)
>> +			<< "Unsupported number of output streams: "
>> +			<< outputCfgs.size();
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (gbmSurface_.createDevice())
>> +		return -ENODEV;
>> +
>> +	if (egl_.initEGLContext(&gbmSurface_))
>> +		return -ENODEV;
>> +
>> +	glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &maxTextureImageUnits);
>> +
>> +	LOG(Debayer, Debug) << "Available fragment shader texture units " << maxTextureImageUnits;
>> +
>> +	if (!ccmEnabled && maxTextureImageUnits < DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS) {
>> +		LOG(Debayer, Error) << "Fragment shader texture unit count " << maxTextureImageUnits
>> + << " required minimum for RGB gain table lookup " << DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS
>> +				    << " try using an identity CCM ";
>> +		return -ENODEV;
>> +	}
>> +
>> +	StreamConfiguration &outputCfg = outputCfgs[0];
>> +	SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);
>> +	std::tie(outputConfig_.stride, outputConfig_.frameSize) =
>> +		strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);
>> +
>> +	if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {
>> +		LOG(Debayer, Error)
>> +			<< "Invalid output size/stride: "
>> +			<< "\n  " << outputCfg.size << " (" << outSizeRange << ")"
>> +			<< "\n  " << outputCfg.stride << " (" << outputConfig_.stride << ")";
>> +		return -EINVAL;
>> +	}
>> +
>> +	window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &
>> +		    ~(inputConfig_.patternSize.width - 1);
>> +	window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &
>> +		    ~(inputConfig_.patternSize.height - 1);
>> +	window_.width = outputCfg.size.width;
>> +	window_.height = outputCfg.size.height;
>> +
>> +	/*
>> +	 * Don't pass x,y from window_ since process() already adjusts for it.
>> +	 * But crop the window to 2/3 of its width and height for speedup.
>> +	 */
>> +	stats_->setWindow((window_.size() * 2 / 3).centeredTo(window_.center()));
>> +
>> +	// Raw bayer input as texture
>> +	eglImageBayerIn_ = new eGLImage(width_, height_, 32, GL_TEXTURE0, 0);
>> +	if (!eglImageBayerIn_)
>> +		return -ENOMEM;
>> +
>> +	// Only do the RGB lookup table textures if CCM is disabled
>> +	if (!ccmEnabled_) {
>> +
>> +		/// RGB correction tables as 2d textures
>> +		// eGL doesn't support glTexImage1D so we do a little hack with 2D to compensate
>> +		eglImageRedLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE1, 1);
>> +		if (!eglImageRedLookup_)
>> +			return -ENOMEM;
>> +
>> + eglImageGreenLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE2, 2);
>> +		if (!eglImageGreenLookup_)
>> +			return -ENOMEM;
>> +
>> + eglImageBlueLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE3, 3);
>> +		if (!eglImageBlueLookup_)
>> +			return -ENOMEM;
>> +	}
>> +
>> + eglImageBayerOut_ = new eGLImage(outputCfg.size.width, outputCfg.size.height, 32, outputCfg.stride,
>> GL_TEXTURE4, 4);
>> +	if (!eglImageBayerOut_)
>> +		return -ENOMEM;
>> +
>> +	if (initBayerShaders(inputCfg.pixelFormat, outputCfg.pixelFormat))
>> +		return -EINVAL;
>> +
>> +	return 0;
>> +}
>> +
>> +Size DebayerEGL::patternSize(PixelFormat inputFormat)
>> +{
>> +	DebayerEGL::DebayerInputConfig config;
>> +
>> +	if (getInputConfig(inputFormat, config) != 0)
>> +		return {};
>> +
>> +	return config.patternSize;
>> +}
>> +
>> +std::vector<PixelFormat> DebayerEGL::formats(PixelFormat inputFormat)
>> +{
>> +	DebayerEGL::DebayerInputConfig config;
>> +
>> +	if (getInputConfig(inputFormat, config) != 0)
>> +		return std::vector<PixelFormat>();
>> +
>> +	return config.outputFormats;
>> +}
>> +
>> +std::tuple<unsigned int, unsigned int>
>> +DebayerEGL::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
>> +{
>> +	DebayerEGL::DebayerOutputConfig config;
>> +
>> +	if (getOutputConfig(outputFormat, config) != 0)
>> +		return std::make_tuple(0, 0);
>> +
>> +	/* Align stride to 256 bytes as a generic GPU memory access alignment */
>> +	unsigned int stride = ALIGN_TO(size.width * config.bpp / 8, 256);
>> +
>> +	return std::make_tuple(stride, stride * size.height);
>> +}
>> +
>> +void DebayerEGL::setShaderVariableValues(void)
>> +{
>> +	/*
>> +	 * Raw Bayer 8-bit, and packed raw Bayer 10-bit/12-bit formats
>> +	 * are stored in a GL_LUMINANCE texture. The texture width is
>> +	 * equal to the stride.
>> +	 */
>> +	GLfloat firstRed[] = { firstRed_x_, firstRed_y_ };
>> +	GLfloat imgSize[] = { (GLfloat)width_,
>> +			      (GLfloat)height_ };
>> +	GLfloat Step[] = { static_cast<float>(bytesPerPixel_) / (inputConfig_.stride - 1),
>> +			   1.0f / (height_ - 1) };
>> +	GLfloat Stride = 1.0f;
>> +	GLfloat projIdentityMatrix[] = {
>> +		1, 0, 0, 0,
>> +		0, 1, 0, 0,
>> +		0, 0, 1, 0,
>> +		0, 0, 0, 1
>> +	};
>> +
>> +	// vertexIn - bayer_8.vert
>> +	glEnableVertexAttribArray(attributeVertex_);
>> +	glVertexAttribPointer(attributeVertex_, 2, GL_FLOAT, GL_TRUE,
>> +			      2 * sizeof(GLfloat), vcoordinates);
>> +
>> +	// textureIn - bayer_8.vert
>> +	glEnableVertexAttribArray(attributeTexture_);
>> +	glVertexAttribPointer(attributeTexture_, 2, GL_FLOAT, GL_TRUE,
>> +			      2 * sizeof(GLfloat), tcoordinates);
>> +
>> +	// Set the sampler2D to the respective texture unit for each texutre
>> +	// To simultaneously sample multiple textures we need to use multiple
>> +	// texture units
>> +	glUniform1i(textureUniformBayerDataIn_, eglImageBayerIn_->texture_unit_uniform_id_);
>> +	if (!ccmEnabled_) {
>> + glUniform1i(textureUniformRedLookupDataIn_, eglImageRedLookup_->texture_unit_uniform_id_);
>> + glUniform1i(textureUniformGreenLookupDataIn_, eglImageGreenLookup_->texture_unit_uniform_id_);
>> + glUniform1i(textureUniformBlueLookupDataIn_, eglImageBlueLookup_->texture_unit_uniform_id_);
>> +	}
>> +
>> +	// These values are:
>> +	// firstRed = tex_bayer_first_red - bayer_8.vert
>> +	// imgSize = tex_size - bayer_8.vert
>> +	// step = tex_step - bayer_8.vert
>> +	// Stride = stride_factor identity.vert
>> +	// textureUniformProjMatri = No scaling
>> +	glUniform2fv(textureUniformBayerFirstRed_, 1, firstRed);
>> +	glUniform2fv(textureUniformSize_, 1, imgSize);
>> +	glUniform2fv(textureUniformStep_, 1, Step);
>> +	glUniform1f(textureUniformStrideFactor_, Stride);
>> +	glUniformMatrix4fv(textureUniformProjMatrix_, 1,
>> +			   GL_FALSE, projIdentityMatrix);
>> +
>> +	LOG(Debayer, Debug) << "vertexIn " << attributeVertex_ << " textureIn " << attributeTexture_
>> +			    << " tex_y " << textureUniformBayerDataIn_
>> +			    << " red_param " << textureUniformRedLookupDataIn_
>> +			    << " green_param " << textureUniformGreenLookupDataIn_
>> +			    << " blue_param " << textureUniformBlueLookupDataIn_
>> +			    << " tex_step " << textureUniformStep_
>> +			    << " tex_size " << textureUniformSize_
>> +			    << " stride_factor " << textureUniformStrideFactor_
>> +			    << " tex_bayer_first_red " << textureUniformBayerFirstRed_;
>> +
>> +	LOG (Debayer, Debug) << "textureUniformY_ = 0 "
>> +			     <<	" firstRed.x " << firstRed[0]
>> +			     <<	" firstRed.y " << firstRed[1]
>> +			     <<	" textureUniformSize_.width " << imgSize[0]
>> +			     <<	" textureUniformSize_.height " << imgSize[1]
>> +			     <<	" textureUniformStep_.x " << Step[0]
>> +			     <<	" textureUniformStep_.y " << Step[1]
>> +			     <<	" textureUniformStrideFactor_ " << Stride
>> +			     <<	" textureUniformProjMatrix_ " << textureUniformProjMatrix_;
>> +	return;
>> +}
>> +
>> +void DebayerEGL::debayerGPU(MappedFrameBuffer &in, MappedFrameBuffer &out, DebayerParams &params)
>> +{
>> +	/* eGL context switch */
>> +	egl_.makeCurrent();
>> +
>> +	/* Greate a standard texture input */
>> + egl_.createTexture2D(eglImageBayerIn_, glFormat_, inputConfig_.stride / bytesPerPixel_, height_,
>> in.planes()[0].data());
>
> Have you already tried if you can use eglCreateImageKHR() here as well? 
> This texture creation is a bit unfortunate because it forces us to:
>
>  1. sync the input/v4l2 dmabuf to CPU
>  2. map it
>  3. copy/"upload" the buffer into a newly allocated texture
>
> while instead directly importing the buffer, like for the output buffer 
> below, wouldn't need any of that.
>
> As dmabuf import is not guaranteed to succeed we'd still need 
> createTexture2D as fallback, however AFAICS createInputDMABufTexture2D() 
> could be altered to using the same parameters - plus the fd, minus the 
> map and converting the GL format to the matching DRM one - and create a 
> equal/similar input texture without copy.
>
>> +
>> +	/* Generate the output render framebuffer as render to texture */
>> +	egl_.createOutputDMABufTexture2D(eglImageBayerOut_, out.getPlaneFD(0));
>> +
>> +	/* Select the method we will use for bayer params CCM or params table */
>> +	if (ccmEnabled_) {
>> +		GLfloat ccm[9] = {
>> +			params.ccm[0][0], params.ccm[0][1], params.ccm[0][2],
>> +			params.ccm[1][0], params.ccm[1][1], params.ccm[1][2],
>> +			params.ccm[2][0], params.ccm[2][1], params.ccm[2][2],
>> +		};
>> +		glUniformMatrix3fv(ccmUniformDataIn_, 1, GL_FALSE, ccm);
>> +	} else {
>> + egl_.createTexture2D(eglImageRedLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1, &params.red);
>> + egl_.createTexture2D(eglImageGreenLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1,
>> &params.green);
>> + egl_.createTexture2D(eglImageBlueLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1,
>> &params.blue);
>> +	}
>> +
>> +	setShaderVariableValues();
>> +	glViewport(0, 0, width_, height_);
>> +	glClear(GL_COLOR_BUFFER_BIT);
>> +	glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS);
>> +
>> +	GLenum err = glGetError();
>> +	if (err != GL_NO_ERROR) {
>> +		LOG(eGL, Error) << "Drawing scene fail " << err;
>> +	} else {
>> +		egl_.syncOutput();
>> +	}
>> +
>> +	/* Teardown the output texture */
>> +	egl_.destroyDMABufTexture(eglImageBayerOut_);
> Probably a bit early, however: it would be nice to reuse the textures at 
> some point. That's what Wayland compositors do with dmabufs from clients 
> because image creation has quite a bit of overhead and can be avoided if 
> just the buffer content changes (would still need one texture per buffer 
> - i.e. 7/8? in total for the input and output pool).
>> +}
>> +
>> +void DebayerEGL::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>> +{
>> +	bench_.startFrame();
>> +
>> +	std::vector<DmaSyncer> dmaSyncers;
>> +
>> +	dmaSyncBegin(dmaSyncers, input, output);
>
> This wrong now, at least with regards to the output buffer. Note that 
> the sync is only needed for CPU access - so in the future, when 
> debayerGPU() will hopefully only use the GPU while CPU is limited to 
> stats_->processFrame(), we can do something like this:
>
> diff --git a/src/libcamera/software_isp/debayer.cpp 
> b/src/libcamera/software_isp/debayer.cpp index c16ce44b1..d277d3b6a 
> 100644 --- a/src/libcamera/software_isp/debayer.cpp +++ 
> b/src/libcamera/software_isp/debayer.cpp @@ -223,8 +223,10 @@ void 
> Debayer::dmaSyncBegin(std::vector<DmaSyncer> &dmaSyncers, FrameBuffer 
> *inpu for (const FrameBuffer::Plane &plane : input->planes()) 
> dmaSyncers.emplace_back(plane.fd, DmaSyncer::SyncType::Read); - for 
> (const FrameBuffer::Plane &plane : output->planes()) - 
> dmaSyncers.emplace_back(plane.fd, DmaSyncer::SyncType::Write); + if 
> (output) { + for (const FrameBuffer::Plane &plane : output->planes()) + 
> dmaSyncers.emplace_back(plane.fd, DmaSyncer::SyncType::Write); + } } /** 
> diff --git a/src/libcamera/software_isp/debayer_egl.cpp 
> b/src/libcamera/software_isp/debayer_egl.cpp index 3932044a2..ded2d27f7 
> 100644 --- a/src/libcamera/software_isp/debayer_egl.cpp +++ 
> b/src/libcamera/software_isp/debayer_egl.cpp @@ -558,8 +558,6 @@ void 
> DebayerEGL::process(uint32_t frame, FrameBuffer *input, FrameBuffer 
> *output std::vector<DmaSyncer> dmaSyncers; - dmaSyncBegin(dmaSyncers, 
> input, output); - setParams(params); /* Copy metadata from the input 
> buffer */ @@ -578,14 +576,14 @@ void DebayerEGL::process(uint32_t frame, 
> FrameBuffer *input, FrameBuffer *output debayerGPU(in, out, params); - 
> dmaSyncers.clear(); - bench_.finishFrame(); 
> metadata.planes()[0].bytesused = out.planes()[0].size(); /* Calculate 
> stats for the whole frame */ + dmaSyncBegin(dmaSyncers, input, nullptr); 
> stats_->processFrame(frame, 0, input); + dmaSyncers.clear(); 
> outputBufferReady.emit(output); inputBufferReady.emit(input);

This is mangled (also at https://patchwork.libcamera.org/patch/24208/,
so not a problem of my client).

> However as long as the input buffer doesn't get directly imported we 
> should at least limit the sync to the input buffer, i.e. do something like:
>
> diff --git a/src/libcamera/software_isp/debayer_egl.cpp 
> b/src/libcamera/software_isp/debayer_egl.cpp
> index 3932044a2..9f251720e 100644
> --- a/src/libcamera/software_isp/debayer_egl.cpp
> +++ b/src/libcamera/software_isp/debayer_egl.cpp
> @@ -558,7 +558,7 @@ void DebayerEGL::process(uint32_t frame, FrameBuffer 
> *input, FrameBuffer *output
>
>          std::vector<DmaSyncer> dmaSyncers;
>
> -       dmaSyncBegin(dmaSyncers, input, output);
> +       dmaSyncBegin(dmaSyncers, input, nullptr);

In case somebody else tries this: This requires putting a check on
nullptr to dmaSyncBegin to prevent a segfault.

>          setParams(params);
>
> @@ -578,14 +578,13 @@ void DebayerEGL::process(uint32_t frame, 
> FrameBuffer *input, FrameBuffer *output
>
>          debayerGPU(in, out, params);
>
> -       dmaSyncers.clear();
> -
>          bench_.finishFrame();
>
>          metadata.planes()[0].bytesused = out.planes()[0].size();
>
>          /* Calculate stats for the whole frame */
>          stats_->processFrame(frame, 0, input);
> +       dmaSyncers.clear();
>
>          outputBufferReady.emit(output);
>          inputBufferReady.emit(input);

I tried the change and it seems to fix the artefacts I experienced.

> I'm not quite sure what the write-sync on the output buffer currently 
> does on affected platforms (mainly aarch64), however IIUC it 
> could/should cause a race between the GPU writing to the buffer and the 
> CPU flushing the corresponding caches, potentially overwriting the new 
> data from the GPU with old data from the CPU, explaining glitches people 
> have been reporting.
>
>> +
>> +	setParams(params);
>> +
>> +	/* Copy metadata from the input buffer */
>> +	FrameMetadata &metadata = output->_d()->metadata();
>> +	metadata.status = input->metadata().status;
>> +	metadata.sequence = input->metadata().sequence;
>> +	metadata.timestamp = input->metadata().timestamp;
>> +
>> +	MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
>> +	MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
>
> I wonder whether we can improve how we mmap the buffers here. For the 
> output buffer we should already be able to just use the FrameBuffer, 
> while the input buffer will get mapped in stats_->processFrame() 
> eventually. The kernels page cache should avoid most overhead, however 
> calling dma_buf_mmap() is AFAIK non-negligible and once we actually do 
> read we always trigger pagefaults on the first read after mapping if I'm 
> not mistaken.
>
> Judging from Gstreamer and how GST_FD_MEMORY_FLAG_KEEP_MAPPED is used in 
> various cases it'll probably be best to never map the output buffers 
> while always keeping the input buffers read-mapped. In case of dmabufs 
> that shouldn't cause additional sync work or copies (as that's handled 
> by DmaSyncer/DMA_BUF_IOCTL_SYNC), while reducing overhead - mainly 
> syscalls - quite a bit. Maybe we can add some wrapper class for 
> FrameBuffer, similar to MappedFrameBuffer, holding the cached EGL images 
> and mappings as required.
>
> So far for now and best regards,
>
> Robert
>
>> +	if (!in.isValid() || !out.isValid()) {
>> +		LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
>> +		metadata.status = FrameMetadata::FrameError;
>> +		return;
>> +	}
>> +
>> +	debayerGPU(in, out, params);
>> +
>> +	dmaSyncers.clear();
>> +
>> +	bench_.finishFrame();
>> +
>> +	metadata.planes()[0].bytesused = out.planes()[0].size();
>> +
>> +	/* Calculate stats for the whole frame */
>> +	stats_->processFrame(frame, 0, input);
>> +
>> +	outputBufferReady.emit(output);
>> +	inputBufferReady.emit(input);
>> +}
>> +
>> +void DebayerEGL::stop()
>> +{
>> +	egl_.cleanUp();
>> +}
>> +
>> +SizeRange DebayerEGL::sizes(PixelFormat inputFormat, const Size &inputSize)
>> +{
>> +	Size patternSize = this->patternSize(inputFormat);
>> +	unsigned int borderHeight = patternSize.height;
>> +
>> +	if (patternSize.isNull())
>> +		return {};
>> +
>> +	/* No need for top/bottom border with a pattern height of 2 */
>> +	if (patternSize.height == 2)
>> +		borderHeight = 0;
>> +
>> +	/*
>> +	 * For debayer interpolation a border is kept around the entire image
>> +	 * and the minimum output size is pattern-height x pattern-width.
>> +	 */
>> +	if (inputSize.width < (3 * patternSize.width) ||
>> +	    inputSize.height < (2 * borderHeight + patternSize.height)) {
>> +		LOG(Debayer, Warning)
>> +			<< "Input format size too small: " << inputSize.toString();
>> +		return {};
>> +	}
>> +
>> +	return SizeRange(Size(patternSize.width, patternSize.height),
>> +			 Size((inputSize.width - 2 * patternSize.width) & ~(patternSize.width - 1),
>> +			      (inputSize.height - 2 * borderHeight) & ~(patternSize.height - 1)),
>> +			 patternSize.width, patternSize.height);
>> +}
>> +
>> +} /* namespace libcamera */
>> diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..ecb22fcb7f3a7d74c3a605a5351ea5871df24f5d
>> --- /dev/null
>> +++ b/src/libcamera/software_isp/debayer_egl.h
>> @@ -0,0 +1,171 @@
>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
>> +/*
>> + * Copyright (C) 2025, Bryan O'Donoghue.
>> + *
>> + * Authors:
>> + * Bryan O'Donoghue<bryan.odonoghue@linaro.org>
>> + *
>> + * debayer_opengl.h - EGL debayer header
>> + */
>> +
>> +#pragma once
>> +
>> +#include <memory>
>> +#include <stdint.h>
>> +#include <vector>
>> +
>> +#define GL_GLEXT_PROTOTYPES
>> +#define EGL_EGLEXT_PROTOTYPES
>> +#include <EGL/egl.h>
>> +#include <EGL/eglext.h>
>> +#include <GLES3/gl32.h>
>> +
>> +#include <libcamera/base/object.h>
>> +
>> +#include "debayer.h"
>> +
>> +#include "libcamera/internal/bayer_format.h"
>> +#include "libcamera/internal/egl.h"
>> +#include "libcamera/internal/framebuffer.h"
>> +#include "libcamera/internal/mapped_framebuffer.h"
>> +#include "libcamera/internal/software_isp/benchmark.h"
>> +#include "libcamera/internal/software_isp/swstats_cpu.h"
>> +
>> +namespace libcamera {
>> +
>> +#define DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS 4
>> +#define DEBAYER_OPENGL_COORDS 4
>> +
>> +/**
>> + * \class DebayerEGL
>> + * \brief Class for debayering using an EGL Shader
>> + *
>> + * Implements an EGL shader based debayering solution.
>> + */
>> +class DebayerEGL : public Debayer
>> +{
>> +public:
>> +	/**
>> +	 * \brief Constructs a DebayerEGL object.
>> +	 * \param[in] stats Pointer to the stats object to use.
>> +	 */
>> +	DebayerEGL(std::unique_ptr<SwStatsCpu> stats);
>> +	~DebayerEGL();
>> +
>> +	/*
>> +	 * Setup the Debayer object according to the passed in parameters.
>> +	 * Return 0 on success, a negative errno value on failure
>> +	 * (unsupported parameters).
>> +	 */
>> +	int configure(const StreamConfiguration &inputCfg,
>> +		      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs,
>> +		      bool ccmEnabled);
>> +
>> +	/*
>> +	 * Get width and height at which the bayer-pattern repeats.
>> +	 * Return pattern-size or an empty Size for an unsupported inputFormat.
>> +	 */
>> +	Size patternSize(PixelFormat inputFormat);
>> +
>> +	std::vector<PixelFormat> formats(PixelFormat input);
>> +	std::tuple<unsigned int, unsigned int> strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
>> +
>> +	void process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, DebayerParams params);
>> +	void stop();
>> +
>> +	/**
>> +	 * \brief Get the file descriptor for the statistics.
>> +	 *
>> +	 * \return the file descriptor pointing to the statistics.
>> +	 */
>> +	const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
>> +
>> +	/**
>> +	 * \brief Get the output frame size.
>> +	 *
>> +	 * \return The output frame size.
>> +	 */
>> +	unsigned int frameSize() { return outputConfig_.frameSize; }
>> +
>> +	SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);
>> +
>> +private:
>> +	static int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);
>> +	static int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);
>> +	int setupStandardBayerOrder(BayerFormat::Order order);
>> +	void pushEnv(std::vector<std::string> &shaderEnv, const char *str);
>> +	int initBayerShaders(PixelFormat inputFormat, PixelFormat outputFormat);
>> +	int initEGLContext();
>> +	int generateTextures();
>> +	int compileShaderProgram(GLuint &shaderId, GLenum shaderType,
>> +				 unsigned char *shaderData, int shaderDataLen,
>> +				 std::vector<std::string> shaderEnv);
>> +	int linkShaderProgram(void);
>> +	int getShaderVariableLocations();
>> +	void setShaderVariableValues(void);
>> +	void configureTexture(GLuint &texture);
>> +	void debayerGPU(MappedFrameBuffer &in, MappedFrameBuffer &out, DebayerParams &params);
>> +
>> +	// Shader program identifiers
>> +	GLuint vertexShaderId_;
>> +	GLuint fragmentShaderId_;
>> +	GLuint programId_;
>> +	enum {
>> +		BAYER_INPUT_INDEX = 0,
>> +		BAYER_OUTPUT_INDEX,
>> +		BAYER_BUF_NUM,
>> +	};
>> +
>> +	// Pointer to object representing input texture
>> +	eGLImage *eglImageBayerIn_;
>> +	eGLImage *eglImageBayerOut_;
>> +
>> +	eGLImage *eglImageRedLookup_;
>> +	eGLImage *eglImageGreenLookup_;
>> +	eGLImage *eglImageBlueLookup_;
>> +
>> +	// Shader parameters
>> +	float firstRed_x_;
>> +	float firstRed_y_;
>> +	GLint attributeVertex_;
>> +	GLint attributeTexture_;
>> +	GLint textureUniformStep_;
>> +	GLint textureUniformSize_;
>> +	GLint textureUniformStrideFactor_;
>> +	GLint textureUniformBayerFirstRed_;
>> +	GLint textureUniformProjMatrix_;
>> +
>> +	GLint textureUniformBayerDataIn_;
>> +
>> +	// These textures will either point to simple RGB gains or to CCM lookup tables
>> +	GLint textureUniformRedLookupDataIn_;
>> +	GLint textureUniformGreenLookupDataIn_;
>> +	GLint textureUniformBlueLookupDataIn_;
>> +
>> +	// Represent per-frame CCM as a uniform vector of floats 3 x 3
>> +	GLint ccmUniformDataIn_;
>> +	bool ccmEnabled_;
>> +
>> +	Rectangle window_;
>> +	std::unique_ptr<SwStatsCpu> stats_;
>> +	eGL egl_;
>> +	GBM gbmSurface_;
>> +	uint32_t width_;
>> +	uint32_t height_;
>> +	GLint glFormat_;
>> +	unsigned int bytesPerPixel_;
>> +	GLfloat vcoordinates[DEBAYER_OPENGL_COORDS][2] = {
>> +		{ -1.0f, -1.0f },
>> +		{ -1.0f, +1.0f },
>> +		{ +1.0f, +1.0f },
>> +		{ +1.0f, -1.0f },
>> +	};
>> +	GLfloat tcoordinates[DEBAYER_OPENGL_COORDS][2] = {
>> +		{ 0.0f, 0.0f },
>> +		{ 0.0f, 1.0f },
>> +		{ 1.0f, 1.0f },
>> +		{ 1.0f, 0.0f },
>> +	};
>> +};
>> +
>> +} /* namespace libcamera */
>> diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build
>> index 59fa5f02a0a5620fa524d8a171332f04e0f769b2..c61ac7d59d37c5ef49ac67fe74cbcda3d89c30cb 100644
>> --- a/src/libcamera/software_isp/meson.build
>> +++ b/src/libcamera/software_isp/meson.build
>> @@ -2,6 +2,7 @@
>>   
>>   softisp_enabled = pipelines.contains('simple')
>>   summary({'SoftISP support' : softisp_enabled}, section : 'Configuration')
>> +summary({'SoftISP GPU acceleration' : gles_headless_enabled}, section : 'Configuration')
>>   
>>   if not softisp_enabled
>>       subdir_done()
>> @@ -14,3 +15,10 @@ libcamera_internal_sources += files([
>>       'software_isp.cpp',
>>       'swstats_cpu.cpp',
>>   ])
>> +
>> +if softisp_enabled and gles_headless_enabled
>> +    config_h.set('HAVE_DEBAYER_EGL', 1)
>> +    libcamera_internal_sources += files([
>> +        'debayer_egl.cpp',
>> +    ])
>> +endif
>>
Bryan O'Donoghue Aug. 29, 2025, 9:21 p.m. UTC | #3
On 29/08/2025 11:34, Robert Mader wrote:
>> +void DebayerEGL::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>> +{
>> +	bench_.startFrame();
>> +
>> +	std::vector<DmaSyncer> dmaSyncers;
>> +
>> +	dmaSyncBegin(dmaSyncers, input, output);
> 
> This wrong now, at least with regards to the output buffer. Note that 
> the sync is only needed for CPU access - so in the future, when 
> debayerGPU() will hopefully only use the GPU while CPU is limited to 
> stats_->processFrame(), we can do something like this:
> 

This is something we've discussed in the call.

We feed a dma-buf handle into eglCreateImageKHR and then mesa/GPU 
presumably are responsible for flushing the cache on the now output 
framebuffer pointed to @ dma-buf handle.

To me the "ownership" of the cache flush is with egl when we sync after 
glDraw(); but, I haven't verified that is so yet.

i.e. surely its up to the egl logic to understand a render buffer 
created with eglCreateImageKHR requires a cache flush when we do eglSync().

I haven't checked the mesa source for this yet.

---
bod

Patch
diff mbox series

diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3932044a231ad8348f011369396556c5ad230ff6
--- /dev/null
+++ b/src/libcamera/software_isp/debayer_egl.cpp
@@ -0,0 +1,628 @@ 
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2024, Linaro Ltd.
+ *
+ * Authors:
+ * Bryan O'Donoghue <bryan.odonoghue@linaro.org>
+ *
+ * debayer_cpu.cpp - EGL based debayering class
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <libcamera/formats.h>
+
+#include "libcamera/internal/glsl_shaders.h"
+#include "debayer_egl.h"
+
+namespace libcamera {
+
+DebayerEGL::DebayerEGL(std::unique_ptr<SwStatsCpu> stats)
+	: Debayer(), stats_(std::move(stats))
+{
+	eglImageBayerIn_ = eglImageBayerOut_= eglImageRedLookup_ = eglImageBlueLookup_ = eglImageGreenLookup_ = NULL;
+}
+
+DebayerEGL::~DebayerEGL()
+{
+	if (eglImageBlueLookup_)
+		delete eglImageBlueLookup_;
+
+	if (eglImageGreenLookup_)
+		delete eglImageGreenLookup_;
+
+	if (eglImageRedLookup_)
+		delete eglImageRedLookup_;
+
+	if (eglImageBayerOut_)
+		delete eglImageBayerOut_;
+
+	if (eglImageBayerIn_)
+		delete eglImageBayerIn_;
+}
+
+int DebayerEGL::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)
+{
+	BayerFormat bayerFormat =
+		BayerFormat::fromPixelFormat(inputFormat);
+
+	if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10) &&
+	    bayerFormat.packing == BayerFormat::Packing::None &&
+	    isStandardBayerOrder(bayerFormat.order)) {
+		config.bpp = (bayerFormat.bitDepth + 7) & ~7;
+		config.patternSize.width = 2;
+		config.patternSize.height = 2;
+		config.outputFormats = std::vector<PixelFormat>({ formats::XRGB8888,
+								  formats::ARGB8888,
+								  formats::XBGR8888,
+								  formats::ABGR8888 });
+		return 0;
+	}
+
+	if (bayerFormat.bitDepth == 10 &&
+	    bayerFormat.packing == BayerFormat::Packing::CSI2 &&
+	    isStandardBayerOrder(bayerFormat.order)) {
+		config.bpp = 10;
+		config.patternSize.width = 4; /* 5 bytes per *4* pixels */
+		config.patternSize.height = 2;
+		config.outputFormats = std::vector<PixelFormat>({ formats::XRGB8888,
+								  formats::ARGB8888,
+								  formats::XBGR8888,
+								  formats::ABGR8888 });
+		return 0;
+	}
+
+	LOG(Debayer, Error)
+		<< "Unsupported input format " << inputFormat.toString();
+
+	return -EINVAL;
+}
+
+int DebayerEGL::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)
+{
+	if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
+	    outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
+		config.bpp = 32;
+		return 0;
+	}
+
+	LOG(Debayer, Error)
+		<< "Unsupported output format " << outputFormat.toString();
+
+	return -EINVAL;
+}
+
+int DebayerEGL::getShaderVariableLocations(void)
+{
+	attributeVertex_ = glGetAttribLocation(programId_, "vertexIn");
+	attributeTexture_ = glGetAttribLocation(programId_, "textureIn");
+
+	textureUniformBayerDataIn_ = glGetUniformLocation(programId_, "tex_y");
+	textureUniformRedLookupDataIn_ = glGetUniformLocation(programId_, "red_param");
+	textureUniformGreenLookupDataIn_ = glGetUniformLocation(programId_, "green_param");
+	textureUniformBlueLookupDataIn_ = glGetUniformLocation(programId_, "blue_param");
+	ccmUniformDataIn_ = glGetUniformLocation(programId_, "ccm");
+
+	textureUniformStep_ = glGetUniformLocation(programId_, "tex_step");
+	textureUniformSize_ = glGetUniformLocation(programId_, "tex_size");
+	textureUniformStrideFactor_ = glGetUniformLocation(programId_, "stride_factor");
+	textureUniformBayerFirstRed_ = glGetUniformLocation(programId_, "tex_bayer_first_red");
+	textureUniformProjMatrix_ = glGetUniformLocation(programId_, "proj_matrix");
+
+	LOG(Debayer, Debug) << "vertexIn " << attributeVertex_ << " textureIn " << attributeTexture_
+			    << " tex_y " << textureUniformBayerDataIn_
+			    << " red_param " << textureUniformRedLookupDataIn_
+			    << " green_param " << textureUniformGreenLookupDataIn_
+			    << " blue_param " << textureUniformBlueLookupDataIn_
+			    << " ccm " << ccmUniformDataIn_
+			    << " tex_step " << textureUniformStep_
+			    << " tex_size " << textureUniformSize_
+			    << " stride_factor " << textureUniformStrideFactor_
+			    << " tex_bayer_first_red " << textureUniformBayerFirstRed_
+			    << " proj_matrix " << textureUniformProjMatrix_;
+	return 0;
+}
+
+int DebayerEGL::initBayerShaders(PixelFormat inputFormat, PixelFormat outputFormat)
+{
+	std::vector<std::string> shaderEnv;
+	unsigned int fragmentShaderDataLen;
+	unsigned char *fragmentShaderData;
+	unsigned int vertexShaderDataLen;
+	unsigned char *vertexShaderData;
+	GLenum err;
+
+	// Target gles 100 glsl requires "#version x" as first directive in shader
+	egl_.pushEnv(shaderEnv, "#version 100");
+
+	// Specify GL_OES_EGL_image_external
+	egl_.pushEnv(shaderEnv, "#extension GL_OES_EGL_image_external: enable");
+
+	// Tell shaders how to re-order output taking account of how the
+	// pixels are actually stored by GBM
+	switch (outputFormat) {
+	case formats::ARGB8888:
+	case formats::XRGB8888:
+		break;
+	case formats::ABGR8888:
+	case formats::XBGR8888:
+		egl_.pushEnv(shaderEnv, "#define SWAP_BLUE");
+		break;
+	default:
+		goto invalid_fmt;
+	}
+
+	// Pixel location parameters
+	glFormat_ = GL_LUMINANCE;
+	bytesPerPixel_ = 1;
+	switch (inputFormat) {
+	case libcamera::formats::SBGGR8:
+	case libcamera::formats::SBGGR10_CSI2P:
+	case libcamera::formats::SBGGR12_CSI2P:
+		firstRed_x_ = 1.0;
+		firstRed_y_ = 1.0;
+		break;
+	case libcamera::formats::SGBRG8:
+	case libcamera::formats::SGBRG10_CSI2P:
+	case libcamera::formats::SGBRG12_CSI2P:
+		firstRed_x_ = 0.0;
+		firstRed_y_ = 1.0;
+		break;
+	case libcamera::formats::SGRBG8:
+	case libcamera::formats::SGRBG10_CSI2P:
+	case libcamera::formats::SGRBG12_CSI2P:
+		firstRed_x_ = 1.0;
+		firstRed_y_ = 0.0;
+		break;
+	case libcamera::formats::SRGGB8:
+	case libcamera::formats::SRGGB10_CSI2P:
+	case libcamera::formats::SRGGB12_CSI2P:
+		firstRed_x_ = 0.0;
+		firstRed_y_ = 0.0;
+		break;
+	default:
+		goto invalid_fmt;
+		break;
+	};
+
+	// Shader selection
+	switch (inputFormat) {
+	case libcamera::formats::SBGGR8:
+	case libcamera::formats::SGBRG8:
+	case libcamera::formats::SGRBG8:
+	case libcamera::formats::SRGGB8:
+		fragmentShaderData = bayer_unpacked_frag;
+		fragmentShaderDataLen = bayer_unpacked_frag_len;
+		vertexShaderData = bayer_unpacked_vert;
+		vertexShaderDataLen = bayer_unpacked_vert_len;
+		break;
+	case libcamera::formats::SBGGR10_CSI2P:
+	case libcamera::formats::SGBRG10_CSI2P:
+	case libcamera::formats::SGRBG10_CSI2P:
+	case libcamera::formats::SRGGB10_CSI2P:
+		egl_.pushEnv(shaderEnv, "#define RAW10P");
+		if (BayerFormat::fromPixelFormat(inputFormat).packing == BayerFormat::Packing::None) {
+			fragmentShaderData = bayer_unpacked_frag;
+			fragmentShaderDataLen = bayer_unpacked_frag_len;
+			vertexShaderData = bayer_unpacked_vert;
+			vertexShaderDataLen = bayer_unpacked_vert_len;
+			glFormat_ = GL_RG;
+			bytesPerPixel_ = 2;
+		} else {
+			fragmentShaderData = bayer_1x_packed_frag;
+			fragmentShaderDataLen = bayer_1x_packed_frag_len;
+			vertexShaderData = identity_vert;
+			vertexShaderDataLen = identity_vert_len;
+		}
+		break;
+	case libcamera::formats::SBGGR12_CSI2P:
+	case libcamera::formats::SGBRG12_CSI2P:
+	case libcamera::formats::SGRBG12_CSI2P:
+	case libcamera::formats::SRGGB12_CSI2P:
+		egl_.pushEnv(shaderEnv, "#define RAW12P");
+		if (BayerFormat::fromPixelFormat(inputFormat).packing == BayerFormat::Packing::None) {
+			fragmentShaderData = bayer_unpacked_frag;
+			fragmentShaderDataLen = bayer_unpacked_frag_len;
+			vertexShaderData = bayer_unpacked_vert;
+			vertexShaderDataLen = bayer_unpacked_vert_len;
+			glFormat_ = GL_RG;
+			bytesPerPixel_ = 2;
+		} else {
+			fragmentShaderData = bayer_1x_packed_frag;
+			fragmentShaderDataLen = bayer_1x_packed_frag_len;
+			vertexShaderData = identity_vert;
+			vertexShaderDataLen = identity_vert_len;
+		}
+		break;
+	default:
+		goto invalid_fmt;
+		break;
+	};
+
+	if (ccmEnabled_) {
+		// Run the CCM if available
+		egl_.pushEnv(shaderEnv, "#define APPLY_CCM_PARAMETERS");
+	} else {
+		// Flag to shaders that we have parameter gain tables
+		egl_.pushEnv(shaderEnv, "#define APPLY_RGB_PARAMETERS");
+	}
+
+	if (egl_.compileVertexShader(vertexShaderId_, vertexShaderData, vertexShaderDataLen, shaderEnv))
+		goto compile_fail;
+
+	if (egl_.compileFragmentShader(fragmentShaderId_, fragmentShaderData, fragmentShaderDataLen, shaderEnv))
+		goto compile_fail;
+
+	if (egl_.linkProgram(programId_, vertexShaderId_, fragmentShaderId_))
+		goto link_fail;
+
+	egl_.dumpShaderSource(vertexShaderId_);
+	egl_.dumpShaderSource(fragmentShaderId_);
+
+	/* Ensure we set the programId_ */
+	egl_.useProgram(programId_);
+	err = glGetError();
+	if (err != GL_NO_ERROR)
+		goto program_fail;
+
+	if (getShaderVariableLocations())
+		goto parameters_fail;
+
+	return 0;
+
+parameters_fail:
+	LOG(Debayer, Error) << "Program parameters fail";
+	return -ENODEV;
+
+program_fail:
+	LOG(Debayer, Error) << "Use program error " << err;
+	return -ENODEV;
+
+link_fail:
+	LOG(Debayer, Error) << "Linking program fail";
+	return -ENODEV;
+
+compile_fail:
+	LOG(Debayer, Error) << "Compile debayer shaders fail";
+	return -ENODEV;
+
+invalid_fmt:
+	LOG(Debayer, Error) << "Unsupported input output format combination";
+	return -EINVAL;
+}
+
+int DebayerEGL::configure(const StreamConfiguration &inputCfg,
+			  const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs,
+			  bool ccmEnabled)
+{
+	GLint maxTextureImageUnits;
+
+	if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)
+		return -EINVAL;
+
+	if (stats_->configure(inputCfg) != 0)
+		return -EINVAL;
+
+	const Size &stats_pattern_size = stats_->patternSize();
+	if (inputConfig_.patternSize.width != stats_pattern_size.width ||
+	    inputConfig_.patternSize.height != stats_pattern_size.height) {
+		LOG(Debayer, Error)
+			<< "mismatching stats and debayer pattern sizes for "
+			<< inputCfg.pixelFormat.toString();
+		return -EINVAL;
+	}
+
+	inputConfig_.stride = inputCfg.stride;
+	width_ = inputCfg.size.width;
+	height_ = inputCfg.size.height;
+	ccmEnabled_ = ccmEnabled;
+
+	if (outputCfgs.size() != 1) {
+		LOG(Debayer, Error)
+			<< "Unsupported number of output streams: "
+			<< outputCfgs.size();
+		return -EINVAL;
+	}
+
+	if (gbmSurface_.createDevice())
+		return -ENODEV;
+
+	if (egl_.initEGLContext(&gbmSurface_))
+		return -ENODEV;
+
+	glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &maxTextureImageUnits);
+
+	LOG(Debayer, Debug) << "Available fragment shader texture units " << maxTextureImageUnits;
+
+	if (!ccmEnabled && maxTextureImageUnits < DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS) {
+		LOG(Debayer, Error) << "Fragment shader texture unit count " << maxTextureImageUnits
+				    << " required minimum for RGB gain table lookup " << DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS
+				    << " try using an identity CCM ";
+		return -ENODEV;
+	}
+
+	StreamConfiguration &outputCfg = outputCfgs[0];
+	SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);
+	std::tie(outputConfig_.stride, outputConfig_.frameSize) =
+		strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);
+
+	if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {
+		LOG(Debayer, Error)
+			<< "Invalid output size/stride: "
+			<< "\n  " << outputCfg.size << " (" << outSizeRange << ")"
+			<< "\n  " << outputCfg.stride << " (" << outputConfig_.stride << ")";
+		return -EINVAL;
+	}
+
+	window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &
+		    ~(inputConfig_.patternSize.width - 1);
+	window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &
+		    ~(inputConfig_.patternSize.height - 1);
+	window_.width = outputCfg.size.width;
+	window_.height = outputCfg.size.height;
+
+	/*
+	 * Don't pass x,y from window_ since process() already adjusts for it.
+	 * But crop the window to 2/3 of its width and height for speedup.
+	 */
+	stats_->setWindow((window_.size() * 2 / 3).centeredTo(window_.center()));
+
+	// Raw bayer input as texture
+	eglImageBayerIn_ = new eGLImage(width_, height_, 32, GL_TEXTURE0, 0);
+	if (!eglImageBayerIn_)
+		return -ENOMEM;
+
+	// Only do the RGB lookup table textures if CCM is disabled
+	if (!ccmEnabled_) {
+
+		/// RGB correction tables as 2d textures
+		// eGL doesn't support glTexImage1D so we do a little hack with 2D to compensate
+		eglImageRedLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE1, 1);
+		if (!eglImageRedLookup_)
+			return -ENOMEM;
+
+		eglImageGreenLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE2, 2);
+		if (!eglImageGreenLookup_)
+			return -ENOMEM;
+
+		eglImageBlueLookup_ = new eGLImage(DebayerParams::kRGBLookupSize, 1, 32, GL_TEXTURE3, 3);
+		if (!eglImageBlueLookup_)
+			return -ENOMEM;
+	}
+
+	eglImageBayerOut_ = new eGLImage(outputCfg.size.width, outputCfg.size.height, 32, outputCfg.stride, GL_TEXTURE4, 4);
+	if (!eglImageBayerOut_)
+		return -ENOMEM;
+
+	if (initBayerShaders(inputCfg.pixelFormat, outputCfg.pixelFormat))
+		return -EINVAL;
+
+	return 0;
+}
+
+Size DebayerEGL::patternSize(PixelFormat inputFormat)
+{
+	DebayerEGL::DebayerInputConfig config;
+
+	if (getInputConfig(inputFormat, config) != 0)
+		return {};
+
+	return config.patternSize;
+}
+
+std::vector<PixelFormat> DebayerEGL::formats(PixelFormat inputFormat)
+{
+	DebayerEGL::DebayerInputConfig config;
+
+	if (getInputConfig(inputFormat, config) != 0)
+		return std::vector<PixelFormat>();
+
+	return config.outputFormats;
+}
+
+std::tuple<unsigned int, unsigned int>
+DebayerEGL::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
+{
+	DebayerEGL::DebayerOutputConfig config;
+
+	if (getOutputConfig(outputFormat, config) != 0)
+		return std::make_tuple(0, 0);
+
+	/* Align stride to 256 bytes as a generic GPU memory access alignment */
+	unsigned int stride = ALIGN_TO(size.width * config.bpp / 8, 256);
+
+	return std::make_tuple(stride, stride * size.height);
+}
+
+void DebayerEGL::setShaderVariableValues(void)
+{
+	/*
+	 * Raw Bayer 8-bit, and packed raw Bayer 10-bit/12-bit formats
+	 * are stored in a GL_LUMINANCE texture. The texture width is
+	 * equal to the stride.
+	 */
+	GLfloat firstRed[] = { firstRed_x_, firstRed_y_ };
+	GLfloat imgSize[] = { (GLfloat)width_,
+			      (GLfloat)height_ };
+	GLfloat Step[] = { static_cast<float>(bytesPerPixel_) / (inputConfig_.stride - 1),
+			   1.0f / (height_ - 1) };
+	GLfloat Stride = 1.0f;
+	GLfloat projIdentityMatrix[] = {
+		1, 0, 0, 0,
+		0, 1, 0, 0,
+		0, 0, 1, 0,
+		0, 0, 0, 1
+	};
+
+	// vertexIn - bayer_8.vert
+	glEnableVertexAttribArray(attributeVertex_);
+	glVertexAttribPointer(attributeVertex_, 2, GL_FLOAT, GL_TRUE,
+			      2 * sizeof(GLfloat), vcoordinates);
+
+	// textureIn - bayer_8.vert
+	glEnableVertexAttribArray(attributeTexture_);
+	glVertexAttribPointer(attributeTexture_, 2, GL_FLOAT, GL_TRUE,
+			      2 * sizeof(GLfloat), tcoordinates);
+
+	// Set the sampler2D to the respective texture unit for each texutre
+	// To simultaneously sample multiple textures we need to use multiple
+	// texture units
+	glUniform1i(textureUniformBayerDataIn_, eglImageBayerIn_->texture_unit_uniform_id_);
+	if (!ccmEnabled_) {
+		glUniform1i(textureUniformRedLookupDataIn_, eglImageRedLookup_->texture_unit_uniform_id_);
+		glUniform1i(textureUniformGreenLookupDataIn_, eglImageGreenLookup_->texture_unit_uniform_id_);
+		glUniform1i(textureUniformBlueLookupDataIn_, eglImageBlueLookup_->texture_unit_uniform_id_);
+	}
+
+	// These values are:
+	// firstRed = tex_bayer_first_red - bayer_8.vert
+	// imgSize = tex_size - bayer_8.vert
+	// step = tex_step - bayer_8.vert
+	// Stride = stride_factor identity.vert
+	// textureUniformProjMatri = No scaling
+	glUniform2fv(textureUniformBayerFirstRed_, 1, firstRed);
+	glUniform2fv(textureUniformSize_, 1, imgSize);
+	glUniform2fv(textureUniformStep_, 1, Step);
+	glUniform1f(textureUniformStrideFactor_, Stride);
+	glUniformMatrix4fv(textureUniformProjMatrix_, 1,
+			   GL_FALSE, projIdentityMatrix);
+
+	LOG(Debayer, Debug) << "vertexIn " << attributeVertex_ << " textureIn " << attributeTexture_
+			    << " tex_y " << textureUniformBayerDataIn_
+			    << " red_param " << textureUniformRedLookupDataIn_
+			    << " green_param " << textureUniformGreenLookupDataIn_
+			    << " blue_param " << textureUniformBlueLookupDataIn_
+			    << " tex_step " << textureUniformStep_
+			    << " tex_size " << textureUniformSize_
+			    << " stride_factor " << textureUniformStrideFactor_
+			    << " tex_bayer_first_red " << textureUniformBayerFirstRed_;
+
+	LOG (Debayer, Debug) << "textureUniformY_ = 0 "
+			     <<	" firstRed.x " << firstRed[0]
+			     <<	" firstRed.y " << firstRed[1]
+			     <<	" textureUniformSize_.width " << imgSize[0]
+			     <<	" textureUniformSize_.height " << imgSize[1]
+			     <<	" textureUniformStep_.x " << Step[0]
+			     <<	" textureUniformStep_.y " << Step[1]
+			     <<	" textureUniformStrideFactor_ " << Stride
+			     <<	" textureUniformProjMatrix_ " << textureUniformProjMatrix_;
+	return;
+}
+
+void DebayerEGL::debayerGPU(MappedFrameBuffer &in, MappedFrameBuffer &out, DebayerParams &params)
+{
+	/* eGL context switch */
+	egl_.makeCurrent();
+
+	/* Greate a standard texture input */
+	egl_.createTexture2D(eglImageBayerIn_, glFormat_, inputConfig_.stride / bytesPerPixel_, height_, in.planes()[0].data());
+
+	/* Generate the output render framebuffer as render to texture */
+	egl_.createOutputDMABufTexture2D(eglImageBayerOut_, out.getPlaneFD(0));
+
+	/* Select the method we will use for bayer params CCM or params table */
+	if (ccmEnabled_) {
+		GLfloat ccm[9] = {
+			params.ccm[0][0], params.ccm[0][1], params.ccm[0][2],
+			params.ccm[1][0], params.ccm[1][1], params.ccm[1][2],
+			params.ccm[2][0], params.ccm[2][1], params.ccm[2][2],
+		};
+		glUniformMatrix3fv(ccmUniformDataIn_, 1, GL_FALSE, ccm);
+	} else {
+		egl_.createTexture2D(eglImageRedLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1, &params.red);
+		egl_.createTexture2D(eglImageGreenLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1, &params.green);
+		egl_.createTexture2D(eglImageBlueLookup_, GL_LUMINANCE, DebayerParams::kRGBLookupSize, 1, &params.blue);
+	}
+
+	setShaderVariableValues();
+	glViewport(0, 0, width_, height_);
+	glClear(GL_COLOR_BUFFER_BIT);
+	glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS);
+
+	GLenum err = glGetError();
+	if (err != GL_NO_ERROR) {
+		LOG(eGL, Error) << "Drawing scene fail " << err;
+	} else {
+		egl_.syncOutput();
+	}
+
+	/* Teardown the output texture */
+	egl_.destroyDMABufTexture(eglImageBayerOut_);
+}
+
+void DebayerEGL::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, DebayerParams params)
+{
+	bench_.startFrame();
+
+	std::vector<DmaSyncer> dmaSyncers;
+
+	dmaSyncBegin(dmaSyncers, input, output);
+
+	setParams(params);
+
+	/* Copy metadata from the input buffer */
+	FrameMetadata &metadata = output->_d()->metadata();
+	metadata.status = input->metadata().status;
+	metadata.sequence = input->metadata().sequence;
+	metadata.timestamp = input->metadata().timestamp;
+
+	MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
+	MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
+	if (!in.isValid() || !out.isValid()) {
+		LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
+		metadata.status = FrameMetadata::FrameError;
+		return;
+	}
+
+	debayerGPU(in, out, params);
+
+	dmaSyncers.clear();
+
+	bench_.finishFrame();
+
+	metadata.planes()[0].bytesused = out.planes()[0].size();
+
+	/* Calculate stats for the whole frame */
+	stats_->processFrame(frame, 0, input);
+
+	outputBufferReady.emit(output);
+	inputBufferReady.emit(input);
+}
+
+void DebayerEGL::stop()
+{
+	egl_.cleanUp();
+}
+
+SizeRange DebayerEGL::sizes(PixelFormat inputFormat, const Size &inputSize)
+{
+	Size patternSize = this->patternSize(inputFormat);
+	unsigned int borderHeight = patternSize.height;
+
+	if (patternSize.isNull())
+		return {};
+
+	/* No need for top/bottom border with a pattern height of 2 */
+	if (patternSize.height == 2)
+		borderHeight = 0;
+
+	/*
+	 * For debayer interpolation a border is kept around the entire image
+	 * and the minimum output size is pattern-height x pattern-width.
+	 */
+	if (inputSize.width < (3 * patternSize.width) ||
+	    inputSize.height < (2 * borderHeight + patternSize.height)) {
+		LOG(Debayer, Warning)
+			<< "Input format size too small: " << inputSize.toString();
+		return {};
+	}
+
+	return SizeRange(Size(patternSize.width, patternSize.height),
+			 Size((inputSize.width - 2 * patternSize.width) & ~(patternSize.width - 1),
+			      (inputSize.height - 2 * borderHeight) & ~(patternSize.height - 1)),
+			 patternSize.width, patternSize.height);
+}
+
+} /* namespace libcamera */
diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h
new file mode 100644
index 0000000000000000000000000000000000000000..ecb22fcb7f3a7d74c3a605a5351ea5871df24f5d
--- /dev/null
+++ b/src/libcamera/software_isp/debayer_egl.h
@@ -0,0 +1,171 @@ 
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2025, Bryan O'Donoghue.
+ *
+ * Authors:
+ * Bryan O'Donoghue <bryan.odonoghue@linaro.org>
+ *
+ * debayer_opengl.h - EGL debayer header
+ */
+
+#pragma once
+
+#include <memory>
+#include <stdint.h>
+#include <vector>
+
+#define GL_GLEXT_PROTOTYPES
+#define EGL_EGLEXT_PROTOTYPES
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+#include <GLES3/gl32.h>
+
+#include <libcamera/base/object.h>
+
+#include "debayer.h"
+
+#include "libcamera/internal/bayer_format.h"
+#include "libcamera/internal/egl.h"
+#include "libcamera/internal/framebuffer.h"
+#include "libcamera/internal/mapped_framebuffer.h"
+#include "libcamera/internal/software_isp/benchmark.h"
+#include "libcamera/internal/software_isp/swstats_cpu.h"
+
+namespace libcamera {
+
+#define DEBAYER_EGL_MIN_SIMPLE_RGB_GAIN_TEXTURE_UNITS 4
+#define DEBAYER_OPENGL_COORDS 4
+
+/**
+ * \class DebayerEGL
+ * \brief Class for debayering using an EGL Shader
+ *
+ * Implements an EGL shader based debayering solution.
+ */
+class DebayerEGL : public Debayer
+{
+public:
+	/**
+	 * \brief Constructs a DebayerEGL object.
+	 * \param[in] stats Pointer to the stats object to use.
+	 */
+	DebayerEGL(std::unique_ptr<SwStatsCpu> stats);
+	~DebayerEGL();
+
+	/*
+	 * Setup the Debayer object according to the passed in parameters.
+	 * Return 0 on success, a negative errno value on failure
+	 * (unsupported parameters).
+	 */
+	int configure(const StreamConfiguration &inputCfg,
+		      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs,
+		      bool ccmEnabled);
+
+	/*
+	 * Get width and height at which the bayer-pattern repeats.
+	 * Return pattern-size or an empty Size for an unsupported inputFormat.
+	 */
+	Size patternSize(PixelFormat inputFormat);
+
+	std::vector<PixelFormat> formats(PixelFormat input);
+	std::tuple<unsigned int, unsigned int> strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
+
+	void process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, DebayerParams params);
+	void stop();
+
+	/**
+	 * \brief Get the file descriptor for the statistics.
+	 *
+	 * \return the file descriptor pointing to the statistics.
+	 */
+	const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
+
+	/**
+	 * \brief Get the output frame size.
+	 *
+	 * \return The output frame size.
+	 */
+	unsigned int frameSize() { return outputConfig_.frameSize; }
+
+	SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);
+
+private:
+	static int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);
+	static int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);
+	int setupStandardBayerOrder(BayerFormat::Order order);
+	void pushEnv(std::vector<std::string> &shaderEnv, const char *str);
+	int initBayerShaders(PixelFormat inputFormat, PixelFormat outputFormat);
+	int initEGLContext();
+	int generateTextures();
+	int compileShaderProgram(GLuint &shaderId, GLenum shaderType,
+				 unsigned char *shaderData, int shaderDataLen,
+				 std::vector<std::string> shaderEnv);
+	int linkShaderProgram(void);
+	int getShaderVariableLocations();
+	void setShaderVariableValues(void);
+	void configureTexture(GLuint &texture);
+	void debayerGPU(MappedFrameBuffer &in, MappedFrameBuffer &out, DebayerParams &params);
+
+	// Shader program identifiers
+	GLuint vertexShaderId_;
+	GLuint fragmentShaderId_;
+	GLuint programId_;
+	enum {
+		BAYER_INPUT_INDEX = 0,
+		BAYER_OUTPUT_INDEX,
+		BAYER_BUF_NUM,
+	};
+
+	// Pointer to object representing input texture
+	eGLImage *eglImageBayerIn_;
+	eGLImage *eglImageBayerOut_;
+
+	eGLImage *eglImageRedLookup_;
+	eGLImage *eglImageGreenLookup_;
+	eGLImage *eglImageBlueLookup_;
+
+	// Shader parameters
+	float firstRed_x_;
+	float firstRed_y_;
+	GLint attributeVertex_;
+	GLint attributeTexture_;
+	GLint textureUniformStep_;
+	GLint textureUniformSize_;
+	GLint textureUniformStrideFactor_;
+	GLint textureUniformBayerFirstRed_;
+	GLint textureUniformProjMatrix_;
+
+	GLint textureUniformBayerDataIn_;
+
+	// These textures will either point to simple RGB gains or to CCM lookup tables
+	GLint textureUniformRedLookupDataIn_;
+	GLint textureUniformGreenLookupDataIn_;
+	GLint textureUniformBlueLookupDataIn_;
+
+	// Represent per-frame CCM as a uniform vector of floats 3 x 3
+	GLint ccmUniformDataIn_;
+	bool ccmEnabled_;
+
+	Rectangle window_;
+	std::unique_ptr<SwStatsCpu> stats_;
+	eGL egl_;
+	GBM gbmSurface_;
+	uint32_t width_;
+	uint32_t height_;
+	GLint glFormat_;
+	unsigned int bytesPerPixel_;
+	GLfloat vcoordinates[DEBAYER_OPENGL_COORDS][2] = {
+		{ -1.0f, -1.0f },
+		{ -1.0f, +1.0f },
+		{ +1.0f, +1.0f },
+		{ +1.0f, -1.0f },
+	};
+	GLfloat tcoordinates[DEBAYER_OPENGL_COORDS][2] = {
+		{ 0.0f, 0.0f },
+		{ 0.0f, 1.0f },
+		{ 1.0f, 1.0f },
+		{ 1.0f, 0.0f },
+	};
+};
+
+} /* namespace libcamera */
diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build
index 59fa5f02a0a5620fa524d8a171332f04e0f769b2..c61ac7d59d37c5ef49ac67fe74cbcda3d89c30cb 100644
--- a/src/libcamera/software_isp/meson.build
+++ b/src/libcamera/software_isp/meson.build
@@ -2,6 +2,7 @@ 
 
 softisp_enabled = pipelines.contains('simple')
 summary({'SoftISP support' : softisp_enabled}, section : 'Configuration')
+summary({'SoftISP GPU acceleration' : gles_headless_enabled}, section : 'Configuration')
 
 if not softisp_enabled
     subdir_done()
@@ -14,3 +15,10 @@  libcamera_internal_sources += files([
     'software_isp.cpp',
     'swstats_cpu.cpp',
 ])
+
+if softisp_enabled and gles_headless_enabled
+    config_h.set('HAVE_DEBAYER_EGL', 1)
+    libcamera_internal_sources += files([
+        'debayer_egl.cpp',
+    ])
+endif