[v3,8/8] libcamera: software_isp: debayer_egl: Implement input/output frame caching mechanism
diff mbox series

Message ID 20260626113325.3218045-9-bryan.odonoghue@linaro.org
State New
Headers show
Series
  • libcamera: software_isp: gpu: Add go faster stripes
Related show

Commit Message

Bryan O'Donoghue June 26, 2026, 11:33 a.m. UTC
Implement a texture caching mechanism for both input and output frames and
for both types of input frame.

The before/after on a Qualcomm x1e is:

9.737ms per frame
5.691ms per frame

The before/after on a Qualcomm sm8250 is:

21.710ms per frame
17.336ms per frame

for i in {1..20} do
cam -c /base/soc@0/cci@ac16000/i2c-bus@1/camera@10 -s width=1920,height=1080 --capture=60

Interestingly there appears to be an absolute ~ 4.x ms per frame uplift as
opposed to what intuition might suggest a proportional.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
---
 src/libcamera/software_isp/debayer_egl.cpp | 87 +++++++++++++++++-----
 src/libcamera/software_isp/debayer_egl.h   | 10 ++-
 2 files changed, 75 insertions(+), 22 deletions(-)

Comments

Robert Mader June 26, 2026, 12:29 p.m. UTC | #1
On 26.06.26 13:33, Bryan O'Donoghue wrote:
> Implement a texture caching mechanism for both input and output frames and
> for both types of input frame.
>
> The before/after on a Qualcomm x1e is:
>
> 9.737ms per frame
> 5.691ms per frame
>
> The before/after on a Qualcomm sm8250 is:
>
> 21.710ms per frame
> 17.336ms per frame
>
> for i in {1..20} do
> cam -c /base/soc@0/cci@ac16000/i2c-bus@1/camera@10 -s width=1920,height=1080 --capture=60
>
> Interestingly there appears to be an absolute ~ 4.x ms per frame uplift as
> opposed to what intuition might suggest a proportional.
>
> Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
> ---
>   src/libcamera/software_isp/debayer_egl.cpp | 87 +++++++++++++++++-----
>   src/libcamera/software_isp/debayer_egl.h   | 10 ++-
>   2 files changed, 75 insertions(+), 22 deletions(-)
>
> diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp
> index 53bb67c17..fc37f0b75 100644
> --- a/src/libcamera/software_isp/debayer_egl.cpp
> +++ b/src/libcamera/software_isp/debayer_egl.cpp
> @@ -355,6 +355,9 @@ int DebayerEGL::configure(const StreamConfiguration &inputCfg,
>   	 */
>   	stats_->setWindow(Rectangle(window_.size()));
>   
> +	inputBufferCount_ = inputCfg.bufferCount;
> +	outputBufferCount_ = outputCfg.bufferCount;
> +
>   	return 0;
>   }
>   
> @@ -514,34 +517,84 @@ void DebayerEGL::setShaderVariableValues(eGLImage &eglImageIn, const DebayerPara
>   	return;
>   }
>   
> -int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams &params, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
> +eGLImage *DebayerEGL::getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
>   {
> -	/* eGL context switch */
> -	egl_.makeCurrent();
> +	auto [input_cache, cache_miss] = eglImageBayerIn_.try_emplace(input->planes()[0].fd.get());
> +	if (cache_miss) {
> +		if (eglImageBayerIn_.size() > inputBufferCount_) {
> +			LOG(Debayer, Error) << "Input count " << inputBufferCount_ << " exhausted";
> +			return nullptr;
> +		}
> +		input_cache->second = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_,
> +								 height_, inputConfig_.stride, GL_TEXTURE0, 0);
> +	}
> +	eGLImage *eglImageIn = input_cache->second.get();
>   
>   	/* Try to create texture for input buffer via dmabuf import */
> -	if (use_dmabuf_) {
> -		if (egl_.createInputDMABufTexture2D(*eglImageBayerIn_, input->planes()[0].fd.get()) != 0) {
> +	if (use_dmabuf_ && cache_miss) {
> +		if (egl_.createInputDMABufTexture2D(*eglImageIn, input->planes()[0].fd.get()) != 0) {
>   			use_dmabuf_ = false;
>   			LOG(Debayer, Info) << "Importing input buffer with DMABuf import failed, falling back to upload";
>   		}
>   	}
>   
> -	/* Otherwise create texture for input buffer via upload from CPU */
> -	if (!use_dmabuf_) {
> +	if (use_dmabuf_) {
> +		/* Cache hit using dmabuf activate and bind */
> +		if (!cache_miss)
> +			egl_.activateBindTexture(*eglImageIn);
> +	} else {
> +		/* Otherwise create texture for input buffer via upload from CPU */
>   		inDmaSyncer->emplace(input->planes()[0].fd, DmaSyncer::SyncType::Read);
>   		inMapped->emplace(input, MappedFrameBuffer::MapFlag::Read);
>   		if (!inMapped->value().isValid()) {
>   			LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
> -			return -ENODEV;
> +			return nullptr;
>   		}
> -		egl_.createTexture2D(*eglImageBayerIn_, inMapped->value().planes()[0].data());
> +		if (cache_miss)
> +			egl_.createTexture2D(*eglImageIn, inMapped->value().planes()[0].data());
> +		else
> +			egl_.updateTexture2D(*eglImageIn, inMapped->value().planes()[0].data());
>   	}
>   
> -	/* Generate the output render framebuffer as render to texture */
> -	egl_.createOutputDMABufTexture2D(*eglImageBayerOut_, output->planes()[0].fd.get());
> +	return eglImageIn;
> +}
> +
> +eGLImage *DebayerEGL::getCachedOutputFrameBuffer(FrameBuffer *output)
> +{
> +	auto [output_cache, cache_miss] = eglImageBayerOut_.try_emplace(output->planes()[0].fd.get());
> +	if (cache_miss) {
> +		if (eglImageBayerOut_.size() > outputBufferCount_) {
> +			LOG(Debayer, Error) << "Output buffer count " << outputBufferCount_ << " exhaustion";
> +			return nullptr;
> +		}
> +		output_cache->second = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width,
> +								  outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1);
> +		egl_.createOutputDMABufTexture2D(*output_cache->second, output->planes()[0].fd.get());
> +	}
Answering to my own question in the previous review: IIUC we indeed 
don't need to call "egl_.activateBindTexture(*eglImageBayerOut_);" in 
the "else" case here, it's apparently only required when setting up the 
texture. Hope that's correct.
> +	eGLImage *eglImageOut = output_cache->second.get();
> +
> +	return eglImageOut;
> +}
> +
> +int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams &params, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
> +{
> +	eGLImage *eglImageIn;
> +	eGLImage *eglImageOut;
> +
> +	/* eGL context switch */
> +	egl_.makeCurrent();
> +
> +	eglImageIn = getCachedInputFrameBuffer(input, inMapped, inDmaSyncer);
> +	if (!eglImageIn)
> +		return -ENOMEM;
> +
> +	eglImageOut = getCachedOutputFrameBuffer(output);
> +	if (!eglImageOut)
> +		return -ENOMEM;
> +
> +	egl_.attachTextureToFBO(*eglImageOut);
> +	setShaderVariableValues(*eglImageIn, params);
>   
> -	setShaderVariableValues(*eglImageBayerIn_, params);
>   	glViewport(0, 0, width_, height_);
>   	glClear(GL_COLOR_BUFFER_BIT);
>   	glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS);
> @@ -623,19 +676,13 @@ int DebayerEGL::start()
>   	if (initBayerShaders(inputPixelFormat_, outputPixelFormat_))
>   		return -EINVAL;
>   
> -	/* Raw bayer input as texture */
> -	eglImageBayerIn_ = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, height_, inputConfig_.stride, GL_TEXTURE0, 0);
> -
> -	/* Texture we will render to */
> -	eglImageBayerOut_ = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1);
> -
>   	return 0;
>   }
>   
>   void DebayerEGL::stop()
>   {
> -	eglImageBayerOut_.reset();
> -	eglImageBayerIn_.reset();
> +	eglImageBayerOut_.clear();
> +	eglImageBayerIn_.clear();
Should we also reset inputBufferCount_ and outputBufferCount_ here, just 
to ensure the state is clean and can't accidentally be used in the future?
>   
>   	if (programId_)
>   		glDeleteProgram(programId_);
> diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h
> index d8509e9f2..ddb3ef378 100644
> --- a/src/libcamera/software_isp/debayer_egl.h
> +++ b/src/libcamera/software_isp/debayer_egl.h
> @@ -22,6 +22,7 @@
>   #include "libcamera/internal/mapped_framebuffer.h"
>   #include "libcamera/internal/software_isp/benchmark.h"
>   #include "libcamera/internal/software_isp/swstats_cpu.h"
> +#include "libcamera/internal/v4l2_videodevice.h"
Should be removed again I think?
>   
>   #include <EGL/egl.h>
>   #include <EGL/eglext.h>
> @@ -70,14 +71,19 @@ private:
>   
>   	bool use_dmabuf_;
>   
> +	eGLImage *getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer);
> +	eGLImage *getCachedOutputFrameBuffer(FrameBuffer *output);
> +
>   	/* Shader program identifiers */
>   	GLuint vertexShaderId_ = 0;
>   	GLuint fragmentShaderId_ = 0;
>   	GLuint programId_ = 0;
>   
>   	/* Pointer to object representing input texture */
> -	std::unique_ptr<eGLImage> eglImageBayerIn_;
> -	std::unique_ptr<eGLImage> eglImageBayerOut_;
> +	std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerIn_;
> +	std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerOut_;
> +	unsigned int inputBufferCount_;
> +	unsigned int outputBufferCount_;
>   
>   	/* Shader parameters */
>   	float firstRed_x_;

This approach LGTM - with the two minor commits addressed:

Reviewed-by: Robert Mader <robert.mader@collabora.com>
Barnabás Pőcze June 26, 2026, 1:19 p.m. UTC | #2
2026. 06. 26. 13:33 keltezéssel, Bryan O'Donoghue írta:
> Implement a texture caching mechanism for both input and output frames and
> for both types of input frame.
> 
> The before/after on a Qualcomm x1e is:
> 
> 9.737ms per frame
> 5.691ms per frame
> 
> The before/after on a Qualcomm sm8250 is:
> 
> 21.710ms per frame
> 17.336ms per frame
> 
> for i in {1..20} do
> cam -c /base/soc@0/cci@ac16000/i2c-bus@1/camera@10 -s width=1920,height=1080 --capture=60
> 
> Interestingly there appears to be an absolute ~ 4.x ms per frame uplift as
> opposed to what intuition might suggest a proportional.
> 
> Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
> ---
>   src/libcamera/software_isp/debayer_egl.cpp | 87 +++++++++++++++++-----
>   src/libcamera/software_isp/debayer_egl.h   | 10 ++-
>   2 files changed, 75 insertions(+), 22 deletions(-)
> 
> diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp
> index 53bb67c17..fc37f0b75 100644
> --- a/src/libcamera/software_isp/debayer_egl.cpp
> +++ b/src/libcamera/software_isp/debayer_egl.cpp
> @@ -355,6 +355,9 @@ int DebayerEGL::configure(const StreamConfiguration &inputCfg,
>   	 */
>   	stats_->setWindow(Rectangle(window_.size()));
> 
> +	inputBufferCount_ = inputCfg.bufferCount;
> +	outputBufferCount_ = outputCfg.bufferCount;
> +
>   	return 0;
>   }
> 
> @@ -514,34 +517,84 @@ void DebayerEGL::setShaderVariableValues(eGLImage &eglImageIn, const DebayerPara
>   	return;
>   }
> 
> -int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams &params, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
> +eGLImage *DebayerEGL::getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
>   {
> -	/* eGL context switch */
> -	egl_.makeCurrent();
> +	auto [input_cache, cache_miss] = eglImageBayerIn_.try_emplace(input->planes()[0].fd.get());
> +	if (cache_miss) {
> +		if (eglImageBayerIn_.size() > inputBufferCount_) {
> +			LOG(Debayer, Error) << "Input count " << inputBufferCount_ << " exhausted";

   eglImageBayerIn_.erase(input_cache);

otherwise the stale entry (nullptr) might cause issues later.


> +			return nullptr;
> +		}
> +		input_cache->second = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_,
> +								 height_, inputConfig_.stride, GL_TEXTURE0, 0);
> +	}
> +	eGLImage *eglImageIn = input_cache->second.get();
> 
>   	/* Try to create texture for input buffer via dmabuf import */
> -	if (use_dmabuf_) {
> -		if (egl_.createInputDMABufTexture2D(*eglImageBayerIn_, input->planes()[0].fd.get()) != 0) {
> +	if (use_dmabuf_ && cache_miss) {
> +		if (egl_.createInputDMABufTexture2D(*eglImageIn, input->planes()[0].fd.get()) != 0) {
>   			use_dmabuf_ = false;
>   			LOG(Debayer, Info) << "Importing input buffer with DMABuf import failed, falling back to upload";
>   		}
>   	}
> 
> -	/* Otherwise create texture for input buffer via upload from CPU */
> -	if (!use_dmabuf_) {
> +	if (use_dmabuf_) {
> +		/* Cache hit using dmabuf activate and bind */
> +		if (!cache_miss)
> +			egl_.activateBindTexture(*eglImageIn);
> +	} else {
> +		/* Otherwise create texture for input buffer via upload from CPU */
>   		inDmaSyncer->emplace(input->planes()[0].fd, DmaSyncer::SyncType::Read);
>   		inMapped->emplace(input, MappedFrameBuffer::MapFlag::Read);
>   		if (!inMapped->value().isValid()) {
>   			LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
> -			return -ENODEV;
> +			return nullptr;
>   		}
> -		egl_.createTexture2D(*eglImageBayerIn_, inMapped->value().planes()[0].data());
> +		if (cache_miss)
> +			egl_.createTexture2D(*eglImageIn, inMapped->value().planes()[0].data());
> +		else
> +			egl_.updateTexture2D(*eglImageIn, inMapped->value().planes()[0].data());
>   	}
> 
> -	/* Generate the output render framebuffer as render to texture */
> -	egl_.createOutputDMABufTexture2D(*eglImageBayerOut_, output->planes()[0].fd.get());
> +	return eglImageIn;
> +}
> +
> +eGLImage *DebayerEGL::getCachedOutputFrameBuffer(FrameBuffer *output)
> +{
> +	auto [output_cache, cache_miss] = eglImageBayerOut_.try_emplace(output->planes()[0].fd.get());
> +	if (cache_miss) {
> +		if (eglImageBayerOut_.size() > outputBufferCount_) {
> +			LOG(Debayer, Error) << "Output buffer count " << outputBufferCount_ << " exhaustion";

   eglImageBayerOut_.erase(output_cache);

for the same reason as above.


> +			return nullptr;
> +		}
> +		output_cache->second = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width,
> +								  outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1);
> +		egl_.createOutputDMABufTexture2D(*output_cache->second, output->planes()[0].fd.get());
> +	}
> +	eGLImage *eglImageOut = output_cache->second.get();
> +
> +	return eglImageOut;

   return output_cache->second.get();

?


> +}
> +
> +int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams &params, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
> +{
> +	eGLImage *eglImageIn;
> +	eGLImage *eglImageOut;
> +
> +	/* eGL context switch */
> +	egl_.makeCurrent();
> +
> +	eglImageIn = getCachedInputFrameBuffer(input, inMapped, inDmaSyncer);
> +	if (!eglImageIn)
> +		return -ENOMEM;
> +
> +	eglImageOut = getCachedOutputFrameBuffer(output);
> +	if (!eglImageOut)
> +		return -ENOMEM;
> +
> +	egl_.attachTextureToFBO(*eglImageOut);
> +	setShaderVariableValues(*eglImageIn, params);
> 
> -	setShaderVariableValues(*eglImageBayerIn_, params);
>   	glViewport(0, 0, width_, height_);
>   	glClear(GL_COLOR_BUFFER_BIT);
>   	glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS);
> @@ -623,19 +676,13 @@ int DebayerEGL::start()
>   	if (initBayerShaders(inputPixelFormat_, outputPixelFormat_))
>   		return -EINVAL;
> 
> -	/* Raw bayer input as texture */
> -	eglImageBayerIn_ = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, height_, inputConfig_.stride, GL_TEXTURE0, 0);
> -
> -	/* Texture we will render to */
> -	eglImageBayerOut_ = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1);
> -
>   	return 0;
>   }
> 
>   void DebayerEGL::stop()
>   {
> -	eglImageBayerOut_.reset();
> -	eglImageBayerIn_.reset();
> +	eglImageBayerOut_.clear();
> +	eglImageBayerIn_.clear();
> 
>   	if (programId_)
>   		glDeleteProgram(programId_);
> diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h
> index d8509e9f2..ddb3ef378 100644
> --- a/src/libcamera/software_isp/debayer_egl.h
> +++ b/src/libcamera/software_isp/debayer_egl.h
> @@ -22,6 +22,7 @@
>   #include "libcamera/internal/mapped_framebuffer.h"
>   #include "libcamera/internal/software_isp/benchmark.h"
>   #include "libcamera/internal/software_isp/swstats_cpu.h"
> +#include "libcamera/internal/v4l2_videodevice.h"
> 
>   #include <EGL/egl.h>
>   #include <EGL/eglext.h>
> @@ -70,14 +71,19 @@ private:
> 
>   	bool use_dmabuf_;
> 
> +	eGLImage *getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer);
> +	eGLImage *getCachedOutputFrameBuffer(FrameBuffer *output);
> +
>   	/* Shader program identifiers */
>   	GLuint vertexShaderId_ = 0;
>   	GLuint fragmentShaderId_ = 0;
>   	GLuint programId_ = 0;
> 
>   	/* Pointer to object representing input texture */
> -	std::unique_ptr<eGLImage> eglImageBayerIn_;
> -	std::unique_ptr<eGLImage> eglImageBayerOut_;
> +	std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerIn_;
> +	std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerOut_;

I think we should use `SharedFD` here to avoid any fd reuse issues. I have just
realized that `V4l2BufferCache` does not use `SharedFD` either. And I think
that is kind of an issue. Well, nowadays every user just uses `FrameBufferAllocator`
with long lived `FrameBuffer`s, but the libcamera api does not require anything
like that.

Maybe this should be discussed more, what the exact schemantics should be, etc.

In any case, this should be an easy change, just needs this as well:

diff --git a/include/libcamera/base/shared_fd.h b/include/libcamera/base/shared_fd.h
index 61fe11c1d6..cec1f6036c 100644
--- a/include/libcamera/base/shared_fd.h
+++ b/include/libcamera/base/shared_fd.h
@@ -7,6 +7,7 @@
  
  #pragma once
  
+#include <functional>
  #include <memory>
  
  namespace libcamera {
@@ -57,3 +58,11 @@ static inline bool operator!=(const SharedFD &lhs, const SharedFD &rhs)
  }
  
  } /* namespace libcamera */
+
+template<>
+struct std::hash<libcamera::SharedFD> : private std::hash<int> {
+       auto operator()(const libcamera::SharedFD &x) const noexcept
+       {
+               return std::hash<int>::operator()(x.get());
+       }
+};


> +	unsigned int inputBufferCount_;
> +	unsigned int outputBufferCount_;

Something like `xyzBufferCacheMaxSize_` / `maxXyzBufferCacheSize_` is more to the point in my opinion.

> 
>   	/* Shader parameters */
>   	float firstRed_x_;
> --
> 2.54.0
>
Barnabás Pőcze June 26, 2026, 1:22 p.m. UTC | #3
2026. 06. 26. 15:19 keltezéssel, Barnabás Pőcze írta:
> 2026. 06. 26. 13:33 keltezéssel, Bryan O'Donoghue írta:
>> Implement a texture caching mechanism for both input and output frames and
>> for both types of input frame.
>>
>> The before/after on a Qualcomm x1e is:
>>
>> 9.737ms per frame
>> 5.691ms per frame
>>
>> The before/after on a Qualcomm sm8250 is:
>>
>> 21.710ms per frame
>> 17.336ms per frame
>>
>> for i in {1..20} do
>> cam -c /base/soc@0/cci@ac16000/i2c-bus@1/camera@10 -s width=1920,height=1080 --capture=60
>>
>> Interestingly there appears to be an absolute ~ 4.x ms per frame uplift as
>> opposed to what intuition might suggest a proportional.
>>
>> Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
>> ---
>>    src/libcamera/software_isp/debayer_egl.cpp | 87 +++++++++++++++++-----
>>    src/libcamera/software_isp/debayer_egl.h   | 10 ++-
>>    2 files changed, 75 insertions(+), 22 deletions(-)
>>
>> diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp
>> index 53bb67c17..fc37f0b75 100644
>> --- a/src/libcamera/software_isp/debayer_egl.cpp
>> +++ b/src/libcamera/software_isp/debayer_egl.cpp
>> @@ -355,6 +355,9 @@ int DebayerEGL::configure(const StreamConfiguration &inputCfg,
> [...]
>> +eGLImage *DebayerEGL::getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
>>    {
>> -	/* eGL context switch */
>> -	egl_.makeCurrent();
>> +	auto [input_cache, cache_miss] = eglImageBayerIn_.try_emplace(input->planes()[0].fd.get());
>> +	if (cache_miss) {
>> +		if (eglImageBayerIn_.size() > inputBufferCount_) {
>> +			LOG(Debayer, Error) << "Input count " << inputBufferCount_ << " exhausted";

And I am not sure this should be an error, maybe "debug" at most.


> 
>     eglImageBayerIn_.erase(input_cache);
> 
> otherwise the stale entry (nullptr) might cause issues later.
> 
> 
>> +			return nullptr;
>> +		}
>> +		input_cache->second = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_,
>> +								 height_, inputConfig_.stride, GL_TEXTURE0, 0);
>> +	}
>> +	eGLImage *eglImageIn = input_cache->second.get();
>>
>>    	/* Try to create texture for input buffer via dmabuf import */
>> -	if (use_dmabuf_) {
>> -		if (egl_.createInputDMABufTexture2D(*eglImageBayerIn_, input->planes()[0].fd.get()) != 0) {
>> +	if (use_dmabuf_ && cache_miss) {
>> +		if (egl_.createInputDMABufTexture2D(*eglImageIn, input->planes()[0].fd.get()) != 0) {
>>    			use_dmabuf_ = false;
>>    			LOG(Debayer, Info) << "Importing input buffer with DMABuf import failed, falling back to upload";
>>    		}
>>    	}
>>
>> -	/* Otherwise create texture for input buffer via upload from CPU */
>> -	if (!use_dmabuf_) {
>> +	if (use_dmabuf_) {
>> +		/* Cache hit using dmabuf activate and bind */
>> +		if (!cache_miss)
>> +			egl_.activateBindTexture(*eglImageIn);
>> +	} else {
>> +		/* Otherwise create texture for input buffer via upload from CPU */
>>    		inDmaSyncer->emplace(input->planes()[0].fd, DmaSyncer::SyncType::Read);
>>    		inMapped->emplace(input, MappedFrameBuffer::MapFlag::Read);
>>    		if (!inMapped->value().isValid()) {
>>    			LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
>> -			return -ENODEV;
>> +			return nullptr;
>>    		}
>> -		egl_.createTexture2D(*eglImageBayerIn_, inMapped->value().planes()[0].data());
>> +		if (cache_miss)
>> +			egl_.createTexture2D(*eglImageIn, inMapped->value().planes()[0].data());
>> +		else
>> +			egl_.updateTexture2D(*eglImageIn, inMapped->value().planes()[0].data());
>>    	}
>>
>> -	/* Generate the output render framebuffer as render to texture */
>> -	egl_.createOutputDMABufTexture2D(*eglImageBayerOut_, output->planes()[0].fd.get());
>> +	return eglImageIn;
>> +}
>> [...]
Bryan O'Donoghue June 26, 2026, 2:10 p.m. UTC | #4
On 26/06/2026 14:19, Barnabás Pőcze wrote:
> diff --git a/include/libcamera/base/shared_fd.h b/include/libcamera/ 
> base/shared_fd.h
> index 61fe11c1d6..cec1f6036c 100644
> --- a/include/libcamera/base/shared_fd.h
> +++ b/include/libcamera/base/shared_fd.h
> @@ -7,6 +7,7 @@
> 
>   #pragma once
> 
> +#include <functional>
>   #include <memory>
> 
>   namespace libcamera {
> @@ -57,3 +58,11 @@ static inline bool operator!=(const SharedFD &lhs, 
> const SharedFD &rhs)
>   }
> 
>   } /* namespace libcamera */
> +
> +template<>
> +struct std::hash<libcamera::SharedFD> : private std::hash<int> {
> +       auto operator()(const libcamera::SharedFD &x) const noexcept
> +       {
> +               return std::hash<int>::operator()(x.get());
> +       }
> +};

Do you want to make that into a patch yourself or for me to pick it up 
for you ?

---
bod

Patch
diff mbox series

diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp
index 53bb67c17..fc37f0b75 100644
--- a/src/libcamera/software_isp/debayer_egl.cpp
+++ b/src/libcamera/software_isp/debayer_egl.cpp
@@ -355,6 +355,9 @@  int DebayerEGL::configure(const StreamConfiguration &inputCfg,
 	 */
 	stats_->setWindow(Rectangle(window_.size()));
 
+	inputBufferCount_ = inputCfg.bufferCount;
+	outputBufferCount_ = outputCfg.bufferCount;
+
 	return 0;
 }
 
@@ -514,34 +517,84 @@  void DebayerEGL::setShaderVariableValues(eGLImage &eglImageIn, const DebayerPara
 	return;
 }
 
-int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams &params, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
+eGLImage *DebayerEGL::getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
 {
-	/* eGL context switch */
-	egl_.makeCurrent();
+	auto [input_cache, cache_miss] = eglImageBayerIn_.try_emplace(input->planes()[0].fd.get());
+	if (cache_miss) {
+		if (eglImageBayerIn_.size() > inputBufferCount_) {
+			LOG(Debayer, Error) << "Input count " << inputBufferCount_ << " exhausted";
+			return nullptr;
+		}
+		input_cache->second = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_,
+								 height_, inputConfig_.stride, GL_TEXTURE0, 0);
+	}
+	eGLImage *eglImageIn = input_cache->second.get();
 
 	/* Try to create texture for input buffer via dmabuf import */
-	if (use_dmabuf_) {
-		if (egl_.createInputDMABufTexture2D(*eglImageBayerIn_, input->planes()[0].fd.get()) != 0) {
+	if (use_dmabuf_ && cache_miss) {
+		if (egl_.createInputDMABufTexture2D(*eglImageIn, input->planes()[0].fd.get()) != 0) {
 			use_dmabuf_ = false;
 			LOG(Debayer, Info) << "Importing input buffer with DMABuf import failed, falling back to upload";
 		}
 	}
 
-	/* Otherwise create texture for input buffer via upload from CPU */
-	if (!use_dmabuf_) {
+	if (use_dmabuf_) {
+		/* Cache hit using dmabuf activate and bind */
+		if (!cache_miss)
+			egl_.activateBindTexture(*eglImageIn);
+	} else {
+		/* Otherwise create texture for input buffer via upload from CPU */
 		inDmaSyncer->emplace(input->planes()[0].fd, DmaSyncer::SyncType::Read);
 		inMapped->emplace(input, MappedFrameBuffer::MapFlag::Read);
 		if (!inMapped->value().isValid()) {
 			LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
-			return -ENODEV;
+			return nullptr;
 		}
-		egl_.createTexture2D(*eglImageBayerIn_, inMapped->value().planes()[0].data());
+		if (cache_miss)
+			egl_.createTexture2D(*eglImageIn, inMapped->value().planes()[0].data());
+		else
+			egl_.updateTexture2D(*eglImageIn, inMapped->value().planes()[0].data());
 	}
 
-	/* Generate the output render framebuffer as render to texture */
-	egl_.createOutputDMABufTexture2D(*eglImageBayerOut_, output->planes()[0].fd.get());
+	return eglImageIn;
+}
+
+eGLImage *DebayerEGL::getCachedOutputFrameBuffer(FrameBuffer *output)
+{
+	auto [output_cache, cache_miss] = eglImageBayerOut_.try_emplace(output->planes()[0].fd.get());
+	if (cache_miss) {
+		if (eglImageBayerOut_.size() > outputBufferCount_) {
+			LOG(Debayer, Error) << "Output buffer count " << outputBufferCount_ << " exhaustion";
+			return nullptr;
+		}
+		output_cache->second = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width,
+								  outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1);
+		egl_.createOutputDMABufTexture2D(*output_cache->second, output->planes()[0].fd.get());
+	}
+	eGLImage *eglImageOut = output_cache->second.get();
+
+	return eglImageOut;
+}
+
+int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams &params, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer)
+{
+	eGLImage *eglImageIn;
+	eGLImage *eglImageOut;
+
+	/* eGL context switch */
+	egl_.makeCurrent();
+
+	eglImageIn = getCachedInputFrameBuffer(input, inMapped, inDmaSyncer);
+	if (!eglImageIn)
+		return -ENOMEM;
+
+	eglImageOut = getCachedOutputFrameBuffer(output);
+	if (!eglImageOut)
+		return -ENOMEM;
+
+	egl_.attachTextureToFBO(*eglImageOut);
+	setShaderVariableValues(*eglImageIn, params);
 
-	setShaderVariableValues(*eglImageBayerIn_, params);
 	glViewport(0, 0, width_, height_);
 	glClear(GL_COLOR_BUFFER_BIT);
 	glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS);
@@ -623,19 +676,13 @@  int DebayerEGL::start()
 	if (initBayerShaders(inputPixelFormat_, outputPixelFormat_))
 		return -EINVAL;
 
-	/* Raw bayer input as texture */
-	eglImageBayerIn_ = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, height_, inputConfig_.stride, GL_TEXTURE0, 0);
-
-	/* Texture we will render to */
-	eglImageBayerOut_ = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1);
-
 	return 0;
 }
 
 void DebayerEGL::stop()
 {
-	eglImageBayerOut_.reset();
-	eglImageBayerIn_.reset();
+	eglImageBayerOut_.clear();
+	eglImageBayerIn_.clear();
 
 	if (programId_)
 		glDeleteProgram(programId_);
diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h
index d8509e9f2..ddb3ef378 100644
--- a/src/libcamera/software_isp/debayer_egl.h
+++ b/src/libcamera/software_isp/debayer_egl.h
@@ -22,6 +22,7 @@ 
 #include "libcamera/internal/mapped_framebuffer.h"
 #include "libcamera/internal/software_isp/benchmark.h"
 #include "libcamera/internal/software_isp/swstats_cpu.h"
+#include "libcamera/internal/v4l2_videodevice.h"
 
 #include <EGL/egl.h>
 #include <EGL/eglext.h>
@@ -70,14 +71,19 @@  private:
 
 	bool use_dmabuf_;
 
+	eGLImage *getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer);
+	eGLImage *getCachedOutputFrameBuffer(FrameBuffer *output);
+
 	/* Shader program identifiers */
 	GLuint vertexShaderId_ = 0;
 	GLuint fragmentShaderId_ = 0;
 	GLuint programId_ = 0;
 
 	/* Pointer to object representing input texture */
-	std::unique_ptr<eGLImage> eglImageBayerIn_;
-	std::unique_ptr<eGLImage> eglImageBayerOut_;
+	std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerIn_;
+	std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerOut_;
+	unsigned int inputBufferCount_;
+	unsigned int outputBufferCount_;
 
 	/* Shader parameters */
 	float firstRed_x_;