[1/5] software_isp: swstats_cpu: Move accumulator storage out of the class
diff mbox series

Message ID 20260216190204.106922-2-johannes.goede@oss.qualcomm.com
State New
Headers show
Series
  • software_isp: debayer_cpu: Add multi-threading support
Related show

Commit Message

Hans de Goede Feb. 16, 2026, 7:02 p.m. UTC
Move the storage used to accumulate the RGB sums and the Y histogram
out of the SwStatsCpu class and into the callers.

The idea is to allow a single SwStatsCpu object to be shared between
multiple threads each processing part of the image, with finishFrame()
accumulating the per thread data into the final stats for the entire
frame.

This is a preparation patch for making DebayerCpu support multi-threading
and this could also be used to make processFrame() multi-threaded.

Benchmarking with the GPU-ISP which does separate swstats benchmarking,
on the Uno-Q which has a weak CPU which is good for performance testing,
shows 20-21ms to generate stats for a 3272x2464 frame both before and
after this change.

Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
---
 .../internal/software_isp/swstats_cpu.h       | 29 ++++-----
 src/libcamera/software_isp/debayer_cpu.cpp    | 12 ++--
 src/libcamera/software_isp/debayer_cpu.h      |  1 +
 src/libcamera/software_isp/swstats_cpu.cpp    | 65 +++++++++++++------
 4 files changed, 65 insertions(+), 42 deletions(-)

Patch
diff mbox series

diff --git a/include/libcamera/internal/software_isp/swstats_cpu.h b/include/libcamera/internal/software_isp/swstats_cpu.h
index 64b3e23f..a157afe8 100644
--- a/include/libcamera/internal/software_isp/swstats_cpu.h
+++ b/include/libcamera/internal/software_isp/swstats_cpu.h
@@ -53,11 +53,11 @@  public:
 
 	int configure(const StreamConfiguration &inputCfg);
 	void setWindow(const Rectangle &window);
-	void startFrame(uint32_t frame);
-	void finishFrame(uint32_t frame, uint32_t bufferId);
+	void startFrame(uint32_t frame, struct SwIspStats statsBuffer[], unsigned int statsBufferCount);
+	void finishFrame(uint32_t frame, uint32_t bufferId, struct SwIspStats statsBuffer[], unsigned int statsBufferCount);
 	void processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *input);
 
-	void processLine0(uint32_t frame, unsigned int y, const uint8_t *src[])
+	void processLine0(uint32_t frame, unsigned int y, const uint8_t *src[], SwIspStats *stats)
 	{
 		if (frame % kStatPerNumFrames)
 			return;
@@ -66,10 +66,10 @@  public:
 		    y >= (window_.y + window_.height))
 			return;
 
-		(this->*stats0_)(src);
+		(this->*stats0_)(src, stats);
 	}
 
-	void processLine2(uint32_t frame, unsigned int y, const uint8_t *src[])
+	void processLine2(uint32_t frame, unsigned int y, const uint8_t *src[], SwIspStats *stats)
 	{
 		if (frame % kStatPerNumFrames)
 			return;
@@ -78,27 +78,27 @@  public:
 		    y >= (window_.y + window_.height))
 			return;
 
-		(this->*stats2_)(src);
+		(this->*stats2_)(src, stats);
 	}
 
 	Signal<uint32_t, uint32_t> statsReady;
 
 private:
-	using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]);
-	using processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in);
+	using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[], SwIspStats *stats);
+	using processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in, SwIspStats *stats);
 
 	int setupStandardBayerOrder(BayerFormat::Order order);
 	/* Bayer 8 bpp unpacked */
-	void statsBGGR8Line0(const uint8_t *src[]);
+	void statsBGGR8Line0(const uint8_t *src[], SwIspStats *stats);
 	/* Bayer 10 bpp unpacked */
-	void statsBGGR10Line0(const uint8_t *src[]);
+	void statsBGGR10Line0(const uint8_t *src[], SwIspStats *stats);
 	/* Bayer 12 bpp unpacked */
-	void statsBGGR12Line0(const uint8_t *src[]);
+	void statsBGGR12Line0(const uint8_t *src[], SwIspStats *stats);
 	/* Bayer 10 bpp packed */
-	void statsBGGR10PLine0(const uint8_t *src[]);
-	void statsGBRG10PLine0(const uint8_t *src[]);
+	void statsBGGR10PLine0(const uint8_t *src[], SwIspStats *stats);
+	void statsGBRG10PLine0(const uint8_t *src[], SwIspStats *stats);
 
-	void processBayerFrame2(MappedFrameBuffer &in);
+	void processBayerFrame2(MappedFrameBuffer &in, SwIspStats *stats);
 
 	processFrameFn processFrame_;
 
@@ -117,7 +117,6 @@  private:
 	unsigned int stride_;
 
 	SharedMemObject<SwIspStats> sharedStats_;
-	SwIspStats stats_;
 	Benchmark bench_;
 };
 
diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
index d0988357..97c1959a 100644
--- a/src/libcamera/software_isp/debayer_cpu.cpp
+++ b/src/libcamera/software_isp/debayer_cpu.cpp
@@ -673,7 +673,7 @@  void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)
 	for (unsigned int y = 0; y < yEnd; y += 2) {
 		shiftLinePointers(linePointers, src);
 		memcpyNextLine(linePointers);
-		stats_->processLine0(frame, y, linePointers);
+		stats_->processLine0(frame, y, linePointers, &statsBuffer_);
 		(this->*debayer0_)(dst, linePointers);
 		src += inputConfig_.stride;
 		dst += outputConfig_.stride;
@@ -688,7 +688,7 @@  void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)
 	if (window_.y == 0) {
 		shiftLinePointers(linePointers, src);
 		memcpyNextLine(linePointers);
-		stats_->processLine0(frame, yEnd, linePointers);
+		stats_->processLine0(frame, yEnd, linePointers, &statsBuffer_);
 		(this->*debayer0_)(dst, linePointers);
 		src += inputConfig_.stride;
 		dst += outputConfig_.stride;
@@ -724,7 +724,7 @@  void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)
 	for (unsigned int y = 0; y < window_.height; y += 4) {
 		shiftLinePointers(linePointers, src);
 		memcpyNextLine(linePointers);
-		stats_->processLine0(frame, y, linePointers);
+		stats_->processLine0(frame, y, linePointers, &statsBuffer_);
 		(this->*debayer0_)(dst, linePointers);
 		src += inputConfig_.stride;
 		dst += outputConfig_.stride;
@@ -737,7 +737,7 @@  void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)
 
 		shiftLinePointers(linePointers, src);
 		memcpyNextLine(linePointers);
-		stats_->processLine2(frame, y, linePointers);
+		stats_->processLine2(frame, y, linePointers, &statsBuffer_);
 		(this->*debayer2_)(dst, linePointers);
 		src += inputConfig_.stride;
 		dst += outputConfig_.stride;
@@ -866,7 +866,7 @@  void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output
 		return;
 	}
 
-	stats_->startFrame(frame);
+	stats_->startFrame(frame, &statsBuffer_, 1);
 
 	if (inputConfig_.patternSize.height == 2)
 		process2(frame, in.planes()[0].data(), out.planes()[0].data());
@@ -885,7 +885,7 @@  void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output
 	 *
 	 * \todo Pass real bufferId once stats buffer passing is changed.
 	 */
-	stats_->finishFrame(frame, 0);
+	stats_->finishFrame(frame, 0, &statsBuffer_, 1);
 	outputBufferReady.emit(output);
 	inputBufferReady.emit(input);
 }
diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
index 7a651746..8abf5168 100644
--- a/src/libcamera/software_isp/debayer_cpu.h
+++ b/src/libcamera/software_isp/debayer_cpu.h
@@ -135,6 +135,7 @@  private:
 	LookupTable gammaLut_;
 	bool ccmEnabled_;
 	DebayerParams params_;
+	SwIspStats statsBuffer_;
 
 	debayerFn debayer0_;
 	debayerFn debayer1_;
diff --git a/src/libcamera/software_isp/swstats_cpu.cpp b/src/libcamera/software_isp/swstats_cpu.cpp
index 5c3011a7..23842f6c 100644
--- a/src/libcamera/software_isp/swstats_cpu.cpp
+++ b/src/libcamera/software_isp/swstats_cpu.cpp
@@ -182,14 +182,14 @@  static constexpr unsigned int kBlueYMul = 29; /* 0.114 * 256 */
 	yVal = r * kRedYMul;               \
 	yVal += g * kGreenYMul;            \
 	yVal += b * kBlueYMul;             \
-	stats_.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;
+	stats->yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;
 
 #define SWSTATS_FINISH_LINE_STATS() \
-	stats_.sum_.r() += sumR;    \
-	stats_.sum_.g() += sumG;    \
-	stats_.sum_.b() += sumB;
+	stats->sum_.r() += sumR;    \
+	stats->sum_.g() += sumG;    \
+	stats->sum_.b() += sumB;
 
-void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])
+void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[], SwIspStats *stats)
 {
 	const uint8_t *src0 = src[1] + window_.x;
 	const uint8_t *src1 = src[2] + window_.x;
@@ -214,7 +214,7 @@  void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])
 	SWSTATS_FINISH_LINE_STATS()
 }
 
-void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])
+void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[], SwIspStats *stats)
 {
 	const uint16_t *src0 = (const uint16_t *)src[1] + window_.x;
 	const uint16_t *src1 = (const uint16_t *)src[2] + window_.x;
@@ -240,7 +240,7 @@  void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])
 	SWSTATS_FINISH_LINE_STATS()
 }
 
-void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])
+void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[], SwIspStats *stats)
 {
 	const uint16_t *src0 = (const uint16_t *)src[1] + window_.x;
 	const uint16_t *src1 = (const uint16_t *)src[2] + window_.x;
@@ -266,7 +266,7 @@  void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])
 	SWSTATS_FINISH_LINE_STATS()
 }
 
-void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])
+void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[], SwIspStats *stats)
 {
 	const uint8_t *src0 = src[1] + window_.x * 5 / 4;
 	const uint8_t *src1 = src[2] + window_.x * 5 / 4;
@@ -292,7 +292,7 @@  void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])
 	SWSTATS_FINISH_LINE_STATS()
 }
 
-void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])
+void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[], SwIspStats *stats)
 {
 	const uint8_t *src0 = src[1] + window_.x * 5 / 4;
 	const uint8_t *src1 = src[2] + window_.x * 5 / 4;
@@ -321,10 +321,13 @@  void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])
 /**
  * \brief Reset state to start statistics gathering for a new frame
  * \param[in] frame The frame number
+ * \param[in] statsBuffer Array of buffers storing stats
+ * \param[in] statsBufferCount number of buffers in the statsBuffer array
  *
  * This may only be called after a successful setWindow() call.
  */
-void SwStatsCpu::startFrame(uint32_t frame)
+void SwStatsCpu::startFrame(uint32_t frame,
+			    struct SwIspStats statsBuffer[], unsigned int statsBufferCount)
 {
 	if (frame % kStatPerNumFrames)
 		return;
@@ -332,21 +335,39 @@  void SwStatsCpu::startFrame(uint32_t frame)
 	if (window_.width == 0)
 		LOG(SwStatsCpu, Error) << "Calling startFrame() without setWindow()";
 
-	stats_.sum_ = RGB<uint64_t>({ 0, 0, 0 });
-	stats_.yHistogram.fill(0);
+	for (unsigned int i = 0; i < statsBufferCount; i++) {
+		statsBuffer[i].sum_ = RGB<uint64_t>({ 0, 0, 0 });
+		statsBuffer[i].yHistogram.fill(0);
+	}
 }
 
 /**
  * \brief Finish statistics calculation for the current frame
  * \param[in] frame The frame number
  * \param[in] bufferId ID of the statistics buffer
+ * \param[in] statsBuffer Array of buffers storing stats
+ * \param[in] statsBufferCount number of buffers in the statsBuffer array
  *
  * This may only be called after a successful setWindow() call.
  */
-void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId)
+void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId,
+			     struct SwIspStats statsBuffer[], unsigned int statsBufferCount)
 {
-	stats_.valid = frame % kStatPerNumFrames == 0;
-	*sharedStats_ = stats_;
+	if (frame % kStatPerNumFrames) {
+		sharedStats_->valid = false;
+		statsReady.emit(frame, bufferId);
+		return;
+	}
+
+	sharedStats_->sum_ = RGB<uint64_t>({ 0, 0, 0 });
+	sharedStats_->yHistogram.fill(0);
+	for (unsigned int i = 0; i < statsBufferCount; i++) {
+		sharedStats_->sum_ += statsBuffer[i].sum_;
+		for (unsigned int j = 0; j < SwIspStats::kYHistogramSize; j++)
+			sharedStats_->yHistogram[j] += statsBuffer[i].yHistogram[j];
+	}
+
+	sharedStats_->valid = true;
 	statsReady.emit(frame, bufferId);
 }
 
@@ -487,7 +508,7 @@  void SwStatsCpu::setWindow(const Rectangle &window)
 	window_.height &= ~(patternSize_.height - 1);
 }
 
-void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
+void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in, SwIspStats *stats)
 {
 	const uint8_t *src = in.planes()[0].data();
 	const uint8_t *linePointers[3];
@@ -504,7 +525,7 @@  void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
 		/* linePointers[0] is not used by any stats0_ functions */
 		linePointers[1] = src;
 		linePointers[2] = src + stride_;
-		(this->*stats0_)(linePointers);
+		(this->*stats0_)(linePointers, stats);
 		src += stride_ * 2;
 	}
 }
@@ -520,12 +541,14 @@  void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
 void SwStatsCpu::processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *input)
 {
 	if (frame % kStatPerNumFrames) {
-		finishFrame(frame, bufferId);
+		finishFrame(frame, bufferId, NULL, 0);
 		return;
 	}
 
+	SwIspStats stats;
+
 	bench_.startFrame();
-	startFrame(frame);
+	startFrame(frame, &stats, 1);
 
 	MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
 	if (!in.isValid()) {
@@ -533,8 +556,8 @@  void SwStatsCpu::processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *in
 		return;
 	}
 
-	(this->*processFrame_)(in);
-	finishFrame(frame, bufferId);
+	(this->*processFrame_)(in, &stats);
+	finishFrame(frame, bufferId, &stats, 1);
 	bench_.finishFrame();
 }