| Message ID | 20260626113325.3218045-9-bryan.odonoghue@linaro.org |
|---|---|
| State | New |
| Headers | show |
| Series |
|
| Related | show |
On 26.06.26 13:33, Bryan O'Donoghue wrote: > Implement a texture caching mechanism for both input and output frames and > for both types of input frame. > > The before/after on a Qualcomm x1e is: > > 9.737ms per frame > 5.691ms per frame > > The before/after on a Qualcomm sm8250 is: > > 21.710ms per frame > 17.336ms per frame > > for i in {1..20} do > cam -c /base/soc@0/cci@ac16000/i2c-bus@1/camera@10 -s width=1920,height=1080 --capture=60 > > Interestingly there appears to be an absolute ~ 4.x ms per frame uplift as > opposed to what intuition might suggest a proportional. > > Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> > --- > src/libcamera/software_isp/debayer_egl.cpp | 87 +++++++++++++++++----- > src/libcamera/software_isp/debayer_egl.h | 10 ++- > 2 files changed, 75 insertions(+), 22 deletions(-) > > diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp > index 53bb67c17..fc37f0b75 100644 > --- a/src/libcamera/software_isp/debayer_egl.cpp > +++ b/src/libcamera/software_isp/debayer_egl.cpp > @@ -355,6 +355,9 @@ int DebayerEGL::configure(const StreamConfiguration &inputCfg, > */ > stats_->setWindow(Rectangle(window_.size())); > > + inputBufferCount_ = inputCfg.bufferCount; > + outputBufferCount_ = outputCfg.bufferCount; > + > return 0; > } > > @@ -514,34 +517,84 @@ void DebayerEGL::setShaderVariableValues(eGLImage &eglImageIn, const DebayerPara > return; > } > > -int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) > +eGLImage *DebayerEGL::getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) > { > - /* eGL context switch */ > - egl_.makeCurrent(); > + auto [input_cache, cache_miss] = eglImageBayerIn_.try_emplace(input->planes()[0].fd.get()); > + if (cache_miss) { > + if (eglImageBayerIn_.size() > inputBufferCount_) { > + LOG(Debayer, Error) << "Input count " << inputBufferCount_ << " exhausted"; > + return nullptr; > + } > + input_cache->second = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, > + height_, inputConfig_.stride, GL_TEXTURE0, 0); > + } > + eGLImage *eglImageIn = input_cache->second.get(); > > /* Try to create texture for input buffer via dmabuf import */ > - if (use_dmabuf_) { > - if (egl_.createInputDMABufTexture2D(*eglImageBayerIn_, input->planes()[0].fd.get()) != 0) { > + if (use_dmabuf_ && cache_miss) { > + if (egl_.createInputDMABufTexture2D(*eglImageIn, input->planes()[0].fd.get()) != 0) { > use_dmabuf_ = false; > LOG(Debayer, Info) << "Importing input buffer with DMABuf import failed, falling back to upload"; > } > } > > - /* Otherwise create texture for input buffer via upload from CPU */ > - if (!use_dmabuf_) { > + if (use_dmabuf_) { > + /* Cache hit using dmabuf activate and bind */ > + if (!cache_miss) > + egl_.activateBindTexture(*eglImageIn); > + } else { > + /* Otherwise create texture for input buffer via upload from CPU */ > inDmaSyncer->emplace(input->planes()[0].fd, DmaSyncer::SyncType::Read); > inMapped->emplace(input, MappedFrameBuffer::MapFlag::Read); > if (!inMapped->value().isValid()) { > LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; > - return -ENODEV; > + return nullptr; > } > - egl_.createTexture2D(*eglImageBayerIn_, inMapped->value().planes()[0].data()); > + if (cache_miss) > + egl_.createTexture2D(*eglImageIn, inMapped->value().planes()[0].data()); > + else > + egl_.updateTexture2D(*eglImageIn, inMapped->value().planes()[0].data()); > } > > - /* Generate the output render framebuffer as render to texture */ > - egl_.createOutputDMABufTexture2D(*eglImageBayerOut_, output->planes()[0].fd.get()); > + return eglImageIn; > +} > + > +eGLImage *DebayerEGL::getCachedOutputFrameBuffer(FrameBuffer *output) > +{ > + auto [output_cache, cache_miss] = eglImageBayerOut_.try_emplace(output->planes()[0].fd.get()); > + if (cache_miss) { > + if (eglImageBayerOut_.size() > outputBufferCount_) { > + LOG(Debayer, Error) << "Output buffer count " << outputBufferCount_ << " exhaustion"; > + return nullptr; > + } > + output_cache->second = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, > + outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1); > + egl_.createOutputDMABufTexture2D(*output_cache->second, output->planes()[0].fd.get()); > + } Answering to my own question in the previous review: IIUC we indeed don't need to call "egl_.activateBindTexture(*eglImageBayerOut_);" in the "else" case here, it's apparently only required when setting up the texture. Hope that's correct. > + eGLImage *eglImageOut = output_cache->second.get(); > + > + return eglImageOut; > +} > + > +int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) > +{ > + eGLImage *eglImageIn; > + eGLImage *eglImageOut; > + > + /* eGL context switch */ > + egl_.makeCurrent(); > + > + eglImageIn = getCachedInputFrameBuffer(input, inMapped, inDmaSyncer); > + if (!eglImageIn) > + return -ENOMEM; > + > + eglImageOut = getCachedOutputFrameBuffer(output); > + if (!eglImageOut) > + return -ENOMEM; > + > + egl_.attachTextureToFBO(*eglImageOut); > + setShaderVariableValues(*eglImageIn, params); > > - setShaderVariableValues(*eglImageBayerIn_, params); > glViewport(0, 0, width_, height_); > glClear(GL_COLOR_BUFFER_BIT); > glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS); > @@ -623,19 +676,13 @@ int DebayerEGL::start() > if (initBayerShaders(inputPixelFormat_, outputPixelFormat_)) > return -EINVAL; > > - /* Raw bayer input as texture */ > - eglImageBayerIn_ = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, height_, inputConfig_.stride, GL_TEXTURE0, 0); > - > - /* Texture we will render to */ > - eglImageBayerOut_ = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1); > - > return 0; > } > > void DebayerEGL::stop() > { > - eglImageBayerOut_.reset(); > - eglImageBayerIn_.reset(); > + eglImageBayerOut_.clear(); > + eglImageBayerIn_.clear(); Should we also reset inputBufferCount_ and outputBufferCount_ here, just to ensure the state is clean and can't accidentally be used in the future? > > if (programId_) > glDeleteProgram(programId_); > diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h > index d8509e9f2..ddb3ef378 100644 > --- a/src/libcamera/software_isp/debayer_egl.h > +++ b/src/libcamera/software_isp/debayer_egl.h > @@ -22,6 +22,7 @@ > #include "libcamera/internal/mapped_framebuffer.h" > #include "libcamera/internal/software_isp/benchmark.h" > #include "libcamera/internal/software_isp/swstats_cpu.h" > +#include "libcamera/internal/v4l2_videodevice.h" Should be removed again I think? > > #include <EGL/egl.h> > #include <EGL/eglext.h> > @@ -70,14 +71,19 @@ private: > > bool use_dmabuf_; > > + eGLImage *getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer); > + eGLImage *getCachedOutputFrameBuffer(FrameBuffer *output); > + > /* Shader program identifiers */ > GLuint vertexShaderId_ = 0; > GLuint fragmentShaderId_ = 0; > GLuint programId_ = 0; > > /* Pointer to object representing input texture */ > - std::unique_ptr<eGLImage> eglImageBayerIn_; > - std::unique_ptr<eGLImage> eglImageBayerOut_; > + std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerIn_; > + std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerOut_; > + unsigned int inputBufferCount_; > + unsigned int outputBufferCount_; > > /* Shader parameters */ > float firstRed_x_; This approach LGTM - with the two minor commits addressed: Reviewed-by: Robert Mader <robert.mader@collabora.com>
2026. 06. 26. 13:33 keltezéssel, Bryan O'Donoghue írta: > Implement a texture caching mechanism for both input and output frames and > for both types of input frame. > > The before/after on a Qualcomm x1e is: > > 9.737ms per frame > 5.691ms per frame > > The before/after on a Qualcomm sm8250 is: > > 21.710ms per frame > 17.336ms per frame > > for i in {1..20} do > cam -c /base/soc@0/cci@ac16000/i2c-bus@1/camera@10 -s width=1920,height=1080 --capture=60 > > Interestingly there appears to be an absolute ~ 4.x ms per frame uplift as > opposed to what intuition might suggest a proportional. > > Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> > --- > src/libcamera/software_isp/debayer_egl.cpp | 87 +++++++++++++++++----- > src/libcamera/software_isp/debayer_egl.h | 10 ++- > 2 files changed, 75 insertions(+), 22 deletions(-) > > diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp > index 53bb67c17..fc37f0b75 100644 > --- a/src/libcamera/software_isp/debayer_egl.cpp > +++ b/src/libcamera/software_isp/debayer_egl.cpp > @@ -355,6 +355,9 @@ int DebayerEGL::configure(const StreamConfiguration &inputCfg, > */ > stats_->setWindow(Rectangle(window_.size())); > > + inputBufferCount_ = inputCfg.bufferCount; > + outputBufferCount_ = outputCfg.bufferCount; > + > return 0; > } > > @@ -514,34 +517,84 @@ void DebayerEGL::setShaderVariableValues(eGLImage &eglImageIn, const DebayerPara > return; > } > > -int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) > +eGLImage *DebayerEGL::getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) > { > - /* eGL context switch */ > - egl_.makeCurrent(); > + auto [input_cache, cache_miss] = eglImageBayerIn_.try_emplace(input->planes()[0].fd.get()); > + if (cache_miss) { > + if (eglImageBayerIn_.size() > inputBufferCount_) { > + LOG(Debayer, Error) << "Input count " << inputBufferCount_ << " exhausted"; eglImageBayerIn_.erase(input_cache); otherwise the stale entry (nullptr) might cause issues later. > + return nullptr; > + } > + input_cache->second = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, > + height_, inputConfig_.stride, GL_TEXTURE0, 0); > + } > + eGLImage *eglImageIn = input_cache->second.get(); > > /* Try to create texture for input buffer via dmabuf import */ > - if (use_dmabuf_) { > - if (egl_.createInputDMABufTexture2D(*eglImageBayerIn_, input->planes()[0].fd.get()) != 0) { > + if (use_dmabuf_ && cache_miss) { > + if (egl_.createInputDMABufTexture2D(*eglImageIn, input->planes()[0].fd.get()) != 0) { > use_dmabuf_ = false; > LOG(Debayer, Info) << "Importing input buffer with DMABuf import failed, falling back to upload"; > } > } > > - /* Otherwise create texture for input buffer via upload from CPU */ > - if (!use_dmabuf_) { > + if (use_dmabuf_) { > + /* Cache hit using dmabuf activate and bind */ > + if (!cache_miss) > + egl_.activateBindTexture(*eglImageIn); > + } else { > + /* Otherwise create texture for input buffer via upload from CPU */ > inDmaSyncer->emplace(input->planes()[0].fd, DmaSyncer::SyncType::Read); > inMapped->emplace(input, MappedFrameBuffer::MapFlag::Read); > if (!inMapped->value().isValid()) { > LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; > - return -ENODEV; > + return nullptr; > } > - egl_.createTexture2D(*eglImageBayerIn_, inMapped->value().planes()[0].data()); > + if (cache_miss) > + egl_.createTexture2D(*eglImageIn, inMapped->value().planes()[0].data()); > + else > + egl_.updateTexture2D(*eglImageIn, inMapped->value().planes()[0].data()); > } > > - /* Generate the output render framebuffer as render to texture */ > - egl_.createOutputDMABufTexture2D(*eglImageBayerOut_, output->planes()[0].fd.get()); > + return eglImageIn; > +} > + > +eGLImage *DebayerEGL::getCachedOutputFrameBuffer(FrameBuffer *output) > +{ > + auto [output_cache, cache_miss] = eglImageBayerOut_.try_emplace(output->planes()[0].fd.get()); > + if (cache_miss) { > + if (eglImageBayerOut_.size() > outputBufferCount_) { > + LOG(Debayer, Error) << "Output buffer count " << outputBufferCount_ << " exhaustion"; eglImageBayerOut_.erase(output_cache); for the same reason as above. > + return nullptr; > + } > + output_cache->second = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, > + outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1); > + egl_.createOutputDMABufTexture2D(*output_cache->second, output->planes()[0].fd.get()); > + } > + eGLImage *eglImageOut = output_cache->second.get(); > + > + return eglImageOut; return output_cache->second.get(); ? > +} > + > +int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) > +{ > + eGLImage *eglImageIn; > + eGLImage *eglImageOut; > + > + /* eGL context switch */ > + egl_.makeCurrent(); > + > + eglImageIn = getCachedInputFrameBuffer(input, inMapped, inDmaSyncer); > + if (!eglImageIn) > + return -ENOMEM; > + > + eglImageOut = getCachedOutputFrameBuffer(output); > + if (!eglImageOut) > + return -ENOMEM; > + > + egl_.attachTextureToFBO(*eglImageOut); > + setShaderVariableValues(*eglImageIn, params); > > - setShaderVariableValues(*eglImageBayerIn_, params); > glViewport(0, 0, width_, height_); > glClear(GL_COLOR_BUFFER_BIT); > glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS); > @@ -623,19 +676,13 @@ int DebayerEGL::start() > if (initBayerShaders(inputPixelFormat_, outputPixelFormat_)) > return -EINVAL; > > - /* Raw bayer input as texture */ > - eglImageBayerIn_ = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, height_, inputConfig_.stride, GL_TEXTURE0, 0); > - > - /* Texture we will render to */ > - eglImageBayerOut_ = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1); > - > return 0; > } > > void DebayerEGL::stop() > { > - eglImageBayerOut_.reset(); > - eglImageBayerIn_.reset(); > + eglImageBayerOut_.clear(); > + eglImageBayerIn_.clear(); > > if (programId_) > glDeleteProgram(programId_); > diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h > index d8509e9f2..ddb3ef378 100644 > --- a/src/libcamera/software_isp/debayer_egl.h > +++ b/src/libcamera/software_isp/debayer_egl.h > @@ -22,6 +22,7 @@ > #include "libcamera/internal/mapped_framebuffer.h" > #include "libcamera/internal/software_isp/benchmark.h" > #include "libcamera/internal/software_isp/swstats_cpu.h" > +#include "libcamera/internal/v4l2_videodevice.h" > > #include <EGL/egl.h> > #include <EGL/eglext.h> > @@ -70,14 +71,19 @@ private: > > bool use_dmabuf_; > > + eGLImage *getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer); > + eGLImage *getCachedOutputFrameBuffer(FrameBuffer *output); > + > /* Shader program identifiers */ > GLuint vertexShaderId_ = 0; > GLuint fragmentShaderId_ = 0; > GLuint programId_ = 0; > > /* Pointer to object representing input texture */ > - std::unique_ptr<eGLImage> eglImageBayerIn_; > - std::unique_ptr<eGLImage> eglImageBayerOut_; > + std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerIn_; > + std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerOut_; I think we should use `SharedFD` here to avoid any fd reuse issues. I have just realized that `V4l2BufferCache` does not use `SharedFD` either. And I think that is kind of an issue. Well, nowadays every user just uses `FrameBufferAllocator` with long lived `FrameBuffer`s, but the libcamera api does not require anything like that. Maybe this should be discussed more, what the exact schemantics should be, etc. In any case, this should be an easy change, just needs this as well: diff --git a/include/libcamera/base/shared_fd.h b/include/libcamera/base/shared_fd.h index 61fe11c1d6..cec1f6036c 100644 --- a/include/libcamera/base/shared_fd.h +++ b/include/libcamera/base/shared_fd.h @@ -7,6 +7,7 @@ #pragma once +#include <functional> #include <memory> namespace libcamera { @@ -57,3 +58,11 @@ static inline bool operator!=(const SharedFD &lhs, const SharedFD &rhs) } } /* namespace libcamera */ + +template<> +struct std::hash<libcamera::SharedFD> : private std::hash<int> { + auto operator()(const libcamera::SharedFD &x) const noexcept + { + return std::hash<int>::operator()(x.get()); + } +}; > + unsigned int inputBufferCount_; > + unsigned int outputBufferCount_; Something like `xyzBufferCacheMaxSize_` / `maxXyzBufferCacheSize_` is more to the point in my opinion. > > /* Shader parameters */ > float firstRed_x_; > -- > 2.54.0 >
2026. 06. 26. 15:19 keltezéssel, Barnabás Pőcze írta: > 2026. 06. 26. 13:33 keltezéssel, Bryan O'Donoghue írta: >> Implement a texture caching mechanism for both input and output frames and >> for both types of input frame. >> >> The before/after on a Qualcomm x1e is: >> >> 9.737ms per frame >> 5.691ms per frame >> >> The before/after on a Qualcomm sm8250 is: >> >> 21.710ms per frame >> 17.336ms per frame >> >> for i in {1..20} do >> cam -c /base/soc@0/cci@ac16000/i2c-bus@1/camera@10 -s width=1920,height=1080 --capture=60 >> >> Interestingly there appears to be an absolute ~ 4.x ms per frame uplift as >> opposed to what intuition might suggest a proportional. >> >> Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> >> --- >> src/libcamera/software_isp/debayer_egl.cpp | 87 +++++++++++++++++----- >> src/libcamera/software_isp/debayer_egl.h | 10 ++- >> 2 files changed, 75 insertions(+), 22 deletions(-) >> >> diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp >> index 53bb67c17..fc37f0b75 100644 >> --- a/src/libcamera/software_isp/debayer_egl.cpp >> +++ b/src/libcamera/software_isp/debayer_egl.cpp >> @@ -355,6 +355,9 @@ int DebayerEGL::configure(const StreamConfiguration &inputCfg, > [...] >> +eGLImage *DebayerEGL::getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) >> { >> - /* eGL context switch */ >> - egl_.makeCurrent(); >> + auto [input_cache, cache_miss] = eglImageBayerIn_.try_emplace(input->planes()[0].fd.get()); >> + if (cache_miss) { >> + if (eglImageBayerIn_.size() > inputBufferCount_) { >> + LOG(Debayer, Error) << "Input count " << inputBufferCount_ << " exhausted"; And I am not sure this should be an error, maybe "debug" at most. > > eglImageBayerIn_.erase(input_cache); > > otherwise the stale entry (nullptr) might cause issues later. > > >> + return nullptr; >> + } >> + input_cache->second = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, >> + height_, inputConfig_.stride, GL_TEXTURE0, 0); >> + } >> + eGLImage *eglImageIn = input_cache->second.get(); >> >> /* Try to create texture for input buffer via dmabuf import */ >> - if (use_dmabuf_) { >> - if (egl_.createInputDMABufTexture2D(*eglImageBayerIn_, input->planes()[0].fd.get()) != 0) { >> + if (use_dmabuf_ && cache_miss) { >> + if (egl_.createInputDMABufTexture2D(*eglImageIn, input->planes()[0].fd.get()) != 0) { >> use_dmabuf_ = false; >> LOG(Debayer, Info) << "Importing input buffer with DMABuf import failed, falling back to upload"; >> } >> } >> >> - /* Otherwise create texture for input buffer via upload from CPU */ >> - if (!use_dmabuf_) { >> + if (use_dmabuf_) { >> + /* Cache hit using dmabuf activate and bind */ >> + if (!cache_miss) >> + egl_.activateBindTexture(*eglImageIn); >> + } else { >> + /* Otherwise create texture for input buffer via upload from CPU */ >> inDmaSyncer->emplace(input->planes()[0].fd, DmaSyncer::SyncType::Read); >> inMapped->emplace(input, MappedFrameBuffer::MapFlag::Read); >> if (!inMapped->value().isValid()) { >> LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; >> - return -ENODEV; >> + return nullptr; >> } >> - egl_.createTexture2D(*eglImageBayerIn_, inMapped->value().planes()[0].data()); >> + if (cache_miss) >> + egl_.createTexture2D(*eglImageIn, inMapped->value().planes()[0].data()); >> + else >> + egl_.updateTexture2D(*eglImageIn, inMapped->value().planes()[0].data()); >> } >> >> - /* Generate the output render framebuffer as render to texture */ >> - egl_.createOutputDMABufTexture2D(*eglImageBayerOut_, output->planes()[0].fd.get()); >> + return eglImageIn; >> +} >> [...]
On 26/06/2026 14:19, Barnabás Pőcze wrote: > diff --git a/include/libcamera/base/shared_fd.h b/include/libcamera/ > base/shared_fd.h > index 61fe11c1d6..cec1f6036c 100644 > --- a/include/libcamera/base/shared_fd.h > +++ b/include/libcamera/base/shared_fd.h > @@ -7,6 +7,7 @@ > > #pragma once > > +#include <functional> > #include <memory> > > namespace libcamera { > @@ -57,3 +58,11 @@ static inline bool operator!=(const SharedFD &lhs, > const SharedFD &rhs) > } > > } /* namespace libcamera */ > + > +template<> > +struct std::hash<libcamera::SharedFD> : private std::hash<int> { > + auto operator()(const libcamera::SharedFD &x) const noexcept > + { > + return std::hash<int>::operator()(x.get()); > + } > +}; Do you want to make that into a patch yourself or for me to pick it up for you ? --- bod
2026. 06. 26. 16:10 keltezéssel, Bryan O'Donoghue írta: > On 26/06/2026 14:19, Barnabás Pőcze wrote: >> diff --git a/include/libcamera/base/shared_fd.h b/include/libcamera/ base/shared_fd.h >> index 61fe11c1d6..cec1f6036c 100644 >> --- a/include/libcamera/base/shared_fd.h >> +++ b/include/libcamera/base/shared_fd.h >> @@ -7,6 +7,7 @@ >> >> #pragma once >> >> +#include <functional> >> #include <memory> >> >> namespace libcamera { >> @@ -57,3 +58,11 @@ static inline bool operator!=(const SharedFD &lhs, const SharedFD &rhs) >> } >> >> } /* namespace libcamera */ >> + >> +template<> >> +struct std::hash<libcamera::SharedFD> : private std::hash<int> { >> + auto operator()(const libcamera::SharedFD &x) const noexcept >> + { >> + return std::hash<int>::operator()(x.get()); >> + } >> +}; > > Do you want to make that into a patch yourself or for me to pick it up for you ? Please include it if you send a new version with `SharedFD` as the map key. > > --- > bod
diff --git a/src/libcamera/software_isp/debayer_egl.cpp b/src/libcamera/software_isp/debayer_egl.cpp index 53bb67c17..fc37f0b75 100644 --- a/src/libcamera/software_isp/debayer_egl.cpp +++ b/src/libcamera/software_isp/debayer_egl.cpp @@ -355,6 +355,9 @@ int DebayerEGL::configure(const StreamConfiguration &inputCfg, */ stats_->setWindow(Rectangle(window_.size())); + inputBufferCount_ = inputCfg.bufferCount; + outputBufferCount_ = outputCfg.bufferCount; + return 0; } @@ -514,34 +517,84 @@ void DebayerEGL::setShaderVariableValues(eGLImage &eglImageIn, const DebayerPara return; } -int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) +eGLImage *DebayerEGL::getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) { - /* eGL context switch */ - egl_.makeCurrent(); + auto [input_cache, cache_miss] = eglImageBayerIn_.try_emplace(input->planes()[0].fd.get()); + if (cache_miss) { + if (eglImageBayerIn_.size() > inputBufferCount_) { + LOG(Debayer, Error) << "Input count " << inputBufferCount_ << " exhausted"; + return nullptr; + } + input_cache->second = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, + height_, inputConfig_.stride, GL_TEXTURE0, 0); + } + eGLImage *eglImageIn = input_cache->second.get(); /* Try to create texture for input buffer via dmabuf import */ - if (use_dmabuf_) { - if (egl_.createInputDMABufTexture2D(*eglImageBayerIn_, input->planes()[0].fd.get()) != 0) { + if (use_dmabuf_ && cache_miss) { + if (egl_.createInputDMABufTexture2D(*eglImageIn, input->planes()[0].fd.get()) != 0) { use_dmabuf_ = false; LOG(Debayer, Info) << "Importing input buffer with DMABuf import failed, falling back to upload"; } } - /* Otherwise create texture for input buffer via upload from CPU */ - if (!use_dmabuf_) { + if (use_dmabuf_) { + /* Cache hit using dmabuf activate and bind */ + if (!cache_miss) + egl_.activateBindTexture(*eglImageIn); + } else { + /* Otherwise create texture for input buffer via upload from CPU */ inDmaSyncer->emplace(input->planes()[0].fd, DmaSyncer::SyncType::Read); inMapped->emplace(input, MappedFrameBuffer::MapFlag::Read); if (!inMapped->value().isValid()) { LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; - return -ENODEV; + return nullptr; } - egl_.createTexture2D(*eglImageBayerIn_, inMapped->value().planes()[0].data()); + if (cache_miss) + egl_.createTexture2D(*eglImageIn, inMapped->value().planes()[0].data()); + else + egl_.updateTexture2D(*eglImageIn, inMapped->value().planes()[0].data()); } - /* Generate the output render framebuffer as render to texture */ - egl_.createOutputDMABufTexture2D(*eglImageBayerOut_, output->planes()[0].fd.get()); + return eglImageIn; +} + +eGLImage *DebayerEGL::getCachedOutputFrameBuffer(FrameBuffer *output) +{ + auto [output_cache, cache_miss] = eglImageBayerOut_.try_emplace(output->planes()[0].fd.get()); + if (cache_miss) { + if (eglImageBayerOut_.size() > outputBufferCount_) { + LOG(Debayer, Error) << "Output buffer count " << outputBufferCount_ << " exhaustion"; + return nullptr; + } + output_cache->second = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, + outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1); + egl_.createOutputDMABufTexture2D(*output_cache->second, output->planes()[0].fd.get()); + } + eGLImage *eglImageOut = output_cache->second.get(); + + return eglImageOut; +} + +int DebayerEGL::debayerGPU(FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer) +{ + eGLImage *eglImageIn; + eGLImage *eglImageOut; + + /* eGL context switch */ + egl_.makeCurrent(); + + eglImageIn = getCachedInputFrameBuffer(input, inMapped, inDmaSyncer); + if (!eglImageIn) + return -ENOMEM; + + eglImageOut = getCachedOutputFrameBuffer(output); + if (!eglImageOut) + return -ENOMEM; + + egl_.attachTextureToFBO(*eglImageOut); + setShaderVariableValues(*eglImageIn, params); - setShaderVariableValues(*eglImageBayerIn_, params); glViewport(0, 0, width_, height_); glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_FAN, 0, DEBAYER_OPENGL_COORDS); @@ -623,19 +676,13 @@ int DebayerEGL::start() if (initBayerShaders(inputPixelFormat_, outputPixelFormat_)) return -EINVAL; - /* Raw bayer input as texture */ - eglImageBayerIn_ = std::make_unique<eGLImage>(glFormat_, inputConfig_.stride / bytesPerPixel_, height_, inputConfig_.stride, GL_TEXTURE0, 0); - - /* Texture we will render to */ - eglImageBayerOut_ = std::make_unique<eGLImage>(GL_RGBA, outputSize_.width, outputSize_.height, outputConfig_.stride, GL_TEXTURE1, 1); - return 0; } void DebayerEGL::stop() { - eglImageBayerOut_.reset(); - eglImageBayerIn_.reset(); + eglImageBayerOut_.clear(); + eglImageBayerIn_.clear(); if (programId_) glDeleteProgram(programId_); diff --git a/src/libcamera/software_isp/debayer_egl.h b/src/libcamera/software_isp/debayer_egl.h index d8509e9f2..ddb3ef378 100644 --- a/src/libcamera/software_isp/debayer_egl.h +++ b/src/libcamera/software_isp/debayer_egl.h @@ -22,6 +22,7 @@ #include "libcamera/internal/mapped_framebuffer.h" #include "libcamera/internal/software_isp/benchmark.h" #include "libcamera/internal/software_isp/swstats_cpu.h" +#include "libcamera/internal/v4l2_videodevice.h" #include <EGL/egl.h> #include <EGL/eglext.h> @@ -70,14 +71,19 @@ private: bool use_dmabuf_; + eGLImage *getCachedInputFrameBuffer(FrameBuffer *input, std::optional<MappedFrameBuffer> *inMapped, std::optional<DmaSyncer> *inDmaSyncer); + eGLImage *getCachedOutputFrameBuffer(FrameBuffer *output); + /* Shader program identifiers */ GLuint vertexShaderId_ = 0; GLuint fragmentShaderId_ = 0; GLuint programId_ = 0; /* Pointer to object representing input texture */ - std::unique_ptr<eGLImage> eglImageBayerIn_; - std::unique_ptr<eGLImage> eglImageBayerOut_; + std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerIn_; + std::unordered_map<int, std::unique_ptr<eGLImage>> eglImageBayerOut_; + unsigned int inputBufferCount_; + unsigned int outputBufferCount_; /* Shader parameters */ float firstRed_x_;
Implement a texture caching mechanism for both input and output frames and for both types of input frame. The before/after on a Qualcomm x1e is: 9.737ms per frame 5.691ms per frame The before/after on a Qualcomm sm8250 is: 21.710ms per frame 17.336ms per frame for i in {1..20} do cam -c /base/soc@0/cci@ac16000/i2c-bus@1/camera@10 -s width=1920,height=1080 --capture=60 Interestingly there appears to be an absolute ~ 4.x ms per frame uplift as opposed to what intuition might suggest a proportional. Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> --- src/libcamera/software_isp/debayer_egl.cpp | 87 +++++++++++++++++----- src/libcamera/software_isp/debayer_egl.h | 10 ++- 2 files changed, 75 insertions(+), 22 deletions(-)