@@ -73,6 +73,7 @@ DebayerCpuThread::DebayerCpuThread(DebayerCpu *debayer, unsigned int threadIndex
debayer_(debayer), threadIndex_(threadIndex),
enableInputMemcpy_(enableInputMemcpy)
{
+ moveToThread(this);
}
/**
@@ -104,8 +105,10 @@ DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats, const GlobalConfigurat
bool enableInputMemcpy =
configuration.option<bool>({ "software_isp", "copy_input_buffer" }).value_or(true);
- /* Just one thread object for now, which will be called inline rather than async */
- threads_.resize(1);
+ unsigned int threadCount =
+ configuration.option<unsigned int>({ "software_isp", "threads" }).value_or(2);
+ threadCount = std::clamp(threadCount, 1u, 8u);
+ threads_.resize(threadCount);
for (unsigned int i = 0; i < threads_.size(); i++)
threads_[i] = std::make_unique<DebayerCpuThread>(this, i, enableInputMemcpy);
@@ -743,6 +746,11 @@ void DebayerCpuThread::process(uint32_t frame, const uint8_t *src, uint8_t *dst)
process2(frame, src, dst);
else
process4(frame, src, dst);
+
+ debayer_->workPendingMutex_.lock();
+ debayer_->workPending_ &= ~(1 << threadIndex_);
+ debayer_->workPendingMutex_.unlock();
+ debayer_->workPendingCv_.notify_one();
}
void DebayerCpuThread::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)
@@ -982,7 +990,21 @@ void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output
stats_->startFrame(frame);
- threads_[0]->process(frame, in.planes()[0].data(), out.planes()[0].data());
+ workPendingMutex_.lock();
+ workPending_ = (1 << threads_.size()) - 1;
+ workPendingMutex_.unlock();
+
+ for (auto &thread : threads_)
+ thread->invokeMethod(&DebayerCpuThread::process,
+ ConnectionTypeQueued, frame,
+ in.planes()[0].data(), out.planes()[0].data());
+
+ {
+ MutexLocker locker(workPendingMutex_);
+ workPendingCv_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(workPendingMutex_) {
+ return workPending_ == 0;
+ });
+ }
metadata.planes()[0].bytesused = out.planes()[0].size();
@@ -1001,6 +1023,23 @@ void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output
inputBufferReady.emit(input);
}
+int DebayerCpu::start()
+{
+ for (auto &thread : threads_)
+ thread->start();
+
+ return 0;
+}
+
+void DebayerCpu::stop()
+{
+ for (auto &thread : threads_)
+ thread->exit();
+
+ for (auto &thread : threads_)
+ thread->wait();
+}
+
SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)
{
Size patternSize = this->patternSize(inputFormat);
@@ -16,6 +16,7 @@
#include <vector>
#include <libcamera/base/object.h>
+#include <libcamera/base/mutex.h>
#include "libcamera/internal/bayer_format.h"
#include "libcamera/internal/global_configuration.h"
@@ -41,6 +42,8 @@ public:
std::tuple<unsigned int, unsigned int>
strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
void process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms);
+ int start();
+ void stop();
SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);
const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
@@ -147,6 +150,9 @@ private:
std::unique_ptr<SwStatsCpu> stats_;
unsigned int xShift_; /* Offset of 0/1 applied to window_.x */
+ unsigned int workPending_ LIBCAMERA_TSA_GUARDED_BY(workPendingMutex_);
+ Mutex workPendingMutex_;
+ ConditionVariable workPendingCv_;
std::vector<std::unique_ptr<DebayerCpuThread>>threads_;
};
Add CPU soft ISP multi-threading support. Benchmark results for the Arduino Uno-Q with a weak CPU which is good for performance testing, all numbers with an IMX219 running at 3280x2464 -> 3272x2464: 1 thread : 147ms / frame, ~6.5 fps 2 threads: 80ms / frame, ~12.5 fps 3 threads: 65ms / frame, ~15 fps Adding a 4th thread does not improve performance. Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com> --- Changes in v3: - Adjust for DebayerCpuThread now inheriting from Thread - Use for (auto &thread : threads_) Changes in v2: - Adjust to use the new DebayerCpuThread class introduced in the v2 patch-series - Re-use threads instead of starting new threads every frame --- src/libcamera/software_isp/debayer_cpu.cpp | 45 ++++++++++++++++++++-- src/libcamera/software_isp/debayer_cpu.h | 6 +++ 2 files changed, 48 insertions(+), 3 deletions(-)