[6/8] libcamera: swstats_cpu: Add support for YUV420
diff mbox series

Message ID 20241103152205.29219-7-hdegoede@redhat.com
State New
Headers show
Series
  • libcamera: Add swstats_cpu::processFrame() and atomisp pipeline handler
Related show

Commit Message

Hans de Goede Nov. 3, 2024, 3:22 p.m. UTC
Add support for processing YUV420 data.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../internal/software_isp/swstats_cpu.h       |  6 ++
 src/libcamera/software_isp/swstats_cpu.cpp    | 89 +++++++++++++++++++
 2 files changed, 95 insertions(+)

Comments

Milan Zamazal Nov. 4, 2024, 5 p.m. UTC | #1
Hi Hans,

Hans de Goede <hdegoede@redhat.com> writes:

> Add support for processing YUV420 data.
>
> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
> ---
>  .../internal/software_isp/swstats_cpu.h       |  6 ++
>  src/libcamera/software_isp/swstats_cpu.cpp    | 89 +++++++++++++++++++
>  2 files changed, 95 insertions(+)
>
> diff --git a/include/libcamera/internal/software_isp/swstats_cpu.h b/include/libcamera/internal/software_isp/swstats_cpu.h
> index fa47cec9..a043861c 100644
> --- a/include/libcamera/internal/software_isp/swstats_cpu.h
> +++ b/include/libcamera/internal/software_isp/swstats_cpu.h
> @@ -71,6 +71,7 @@ public:
>  private:
>  	using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]);
>  	using processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in);
> +	using finishFrameFn = void (SwStatsCpu::*)();
>  
>  	int setupStandardBayerOrder(BayerFormat::Order order);
>  	/* Bayer 8 bpp unpacked */
> @@ -82,10 +83,15 @@ private:
>  	/* Bayer 10 bpp packed */
>  	void statsBGGR10PLine0(const uint8_t *src[]);
>  	void statsGBRG10PLine0(const uint8_t *src[]);
> +	/* YUV420 3 planes */
> +	void statsYUV420Line0(const uint8_t *src[]);
>  
>  	void processBayerFrame2(MappedFrameBuffer &in);
> +	void processYUV420Frame(MappedFrameBuffer &in);
> +	void finishYUV420Frame();
>  
>  	processFrameFn processFrame_;
> +	finishFrameFn finishFrame_;
>  
>  	/* Variables set by configure(), used every line */
>  	statsProcessFn stats0_;
> diff --git a/src/libcamera/software_isp/swstats_cpu.cpp b/src/libcamera/software_isp/swstats_cpu.cpp
> index 1ff15f5b..e81c96a2 100644
> --- a/src/libcamera/software_isp/swstats_cpu.cpp
> +++ b/src/libcamera/software_isp/swstats_cpu.cpp
> @@ -13,6 +13,7 @@
>  
>  #include <libcamera/base/log.h>
>  
> +#include <libcamera/formats.h>
>  #include <libcamera/stream.h>
>  
>  #include "libcamera/internal/bayer_format.h"
> @@ -288,6 +289,40 @@ void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])
>  	SWSTATS_FINISH_LINE_STATS()
>  }
>  
> +void SwStatsCpu::statsYUV420Line0(const uint8_t *src[])
> +{
> +	uint64_t sumY = 0;
> +	uint64_t sumU = 0;
> +	uint64_t sumV = 0;
> +	uint8_t y, u, v;
> +
> +	/* Adjust src[] for starting at window_.x */
> +	src[0] += window_.x;
> +	src[1] += window_.x / 2;
> +	src[2] += window_.x / 2;
> +
> +	/* x += 4 sample every other 2x2 block */
> +	for (int x = 0; x < (int)window_.width; x += 4) {

I've probably already asked about using a signed int in line processors
for other patterns but I fail to see again why `x' cannot be unsigned
here.

> +		/*
> +		 * Take y from the top left corner of the 2x2 block instead
> +		 * of averaging 4 y-s.

OK.

> +		 */
> +		y = src[0][x];
> +		u = src[1][x];
> +		v = src[2][x];
> +
> +		sumY += y;
> +		sumU += u;
> +		sumV += v;
> +
> +		stats_.yHistogram[y * SwIspStats::kYHistogramSize / 256]++;
> +	}
> +
> +	stats_.sumR_ += sumY;
> +	stats_.sumG_ += sumU;
> +	stats_.sumB_ += sumV;

This is above my confusion acceptation threshold :-), data shouldn't
contain something completely different than what the name suggests.  How
about changing stats_.sum* to stats_.sum[3]?  And defining constants for
r/g/b and y/u/v array index access to make clear what we work with at
the given places?

> +}
> +
>  /**
>   * \brief Reset state to start statistics gathering for a new frame
>   *
> @@ -313,6 +348,9 @@ void SwStatsCpu::startFrame(void)
>   */
>  void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId)
>  {
> +	if (finishFrame_)
> +		(this->*finishFrame_)();
> +
>  	*sharedStats_ = stats_;
>  	statsReady.emit(frame, bufferId);
>  }
> @@ -362,6 +400,20 @@ int SwStatsCpu::setupStandardBayerOrder(BayerFormat::Order order)
>  int SwStatsCpu::configure(const StreamConfiguration &inputCfg)
>  {
>  	stride_ = inputCfg.stride;
> +	finishFrame_ = NULL;

nullptr

> +
> +	if (inputCfg.pixelFormat == formats::YUV420) {
> +		patternSize_.height = 2;
> +		patternSize_.width = 2;
> +		/* Skip every 3th and 4th line, sample every other 2x2 block */
> +		ySkipMask_ = 0x02;
> +		xShift_ = 0;
> +		swapLines_ = false;
> +		stats0_ = &SwStatsCpu::statsYUV420Line0;
> +		processFrame_ = &SwStatsCpu::processYUV420Frame;
> +		finishFrame_ = &SwStatsCpu::finishYUV420Frame;
> +		return 0;
> +	}
>  
>  	BayerFormat bayerFormat =
>  		BayerFormat::fromPixelFormat(inputCfg.pixelFormat);
> @@ -430,6 +482,43 @@ void SwStatsCpu::setWindow(const Rectangle &window)
>  	window_.height &= ~(patternSize_.height - 1);
>  }
>  
> +void SwStatsCpu::processYUV420Frame(MappedFrameBuffer &in)
> +{
> +	const uint8_t *linePointers[3];
> +
> +	linePointers[0] = in.planes()[0].data();
> +	linePointers[1] = in.planes()[1].data();
> +	linePointers[2] = in.planes()[2].data();
> +
> +	/* Adjust linePointers for starting at window_.y */
> +	linePointers[0] += window_.y * stride_;
> +	linePointers[1] += window_.y * stride_ / 4;
> +	linePointers[2] += window_.y * stride_ / 4;
> +
> +	for (unsigned int y = 0; y < window_.height; y += 2) {
> +		if (!(y & ySkipMask_))
> +			(this->*stats0_)(linePointers);
> +
> +		linePointers[0] += stride_ * 2;
> +		linePointers[1] += stride_ / 2;
> +		linePointers[2] += stride_ / 2;
> +	}

4:2:0, 2x2 blocks, skip mask, ..., headache :-).  OK, the stride_
multipliers & dividers look correct.

> +}
> +
> +void SwStatsCpu::finishYUV420Frame()
> +{
> +	/* sumR_ / G_ / B_ contain Y / U / V sums convert this */
> +	double divider = (uint64_t)window_.width * window_.height * 256 / 16;

Why 256 / 16 ?  To convert to 0..1 range and to adjust for sampling
every 16th pixel (one from 4 * every other horizontally * every other
vertically)?  It should have an explanation here.

> +	double Y = (double)stats_.sumR_ / divider;
> +	/* U and V 0 - 255 values represent -128 - 127 range */

By shifting?  I.e. 0 represents -128 and 128 represents 0?

> +	double U = (double)stats_.sumG_ / divider - 0.5;
> +	double V = (double)stats_.sumB_ / divider - 0.5;
> +
> +	stats_.sumR_ = (Y + 1.140 * V) * divider;
> +	stats_.sumG_ = (Y - 0.395 * U - 0.581 * V) * divider;
> +	stats_.sumB_ = (Y + 2.032 * U) * divider;
> +}
> +
>  void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
>  {
>  	const uint8_t *src = in.planes()[0].data();

Patch
diff mbox series

diff --git a/include/libcamera/internal/software_isp/swstats_cpu.h b/include/libcamera/internal/software_isp/swstats_cpu.h
index fa47cec9..a043861c 100644
--- a/include/libcamera/internal/software_isp/swstats_cpu.h
+++ b/include/libcamera/internal/software_isp/swstats_cpu.h
@@ -71,6 +71,7 @@  public:
 private:
 	using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]);
 	using processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in);
+	using finishFrameFn = void (SwStatsCpu::*)();
 
 	int setupStandardBayerOrder(BayerFormat::Order order);
 	/* Bayer 8 bpp unpacked */
@@ -82,10 +83,15 @@  private:
 	/* Bayer 10 bpp packed */
 	void statsBGGR10PLine0(const uint8_t *src[]);
 	void statsGBRG10PLine0(const uint8_t *src[]);
+	/* YUV420 3 planes */
+	void statsYUV420Line0(const uint8_t *src[]);
 
 	void processBayerFrame2(MappedFrameBuffer &in);
+	void processYUV420Frame(MappedFrameBuffer &in);
+	void finishYUV420Frame();
 
 	processFrameFn processFrame_;
+	finishFrameFn finishFrame_;
 
 	/* Variables set by configure(), used every line */
 	statsProcessFn stats0_;
diff --git a/src/libcamera/software_isp/swstats_cpu.cpp b/src/libcamera/software_isp/swstats_cpu.cpp
index 1ff15f5b..e81c96a2 100644
--- a/src/libcamera/software_isp/swstats_cpu.cpp
+++ b/src/libcamera/software_isp/swstats_cpu.cpp
@@ -13,6 +13,7 @@ 
 
 #include <libcamera/base/log.h>
 
+#include <libcamera/formats.h>
 #include <libcamera/stream.h>
 
 #include "libcamera/internal/bayer_format.h"
@@ -288,6 +289,40 @@  void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])
 	SWSTATS_FINISH_LINE_STATS()
 }
 
+void SwStatsCpu::statsYUV420Line0(const uint8_t *src[])
+{
+	uint64_t sumY = 0;
+	uint64_t sumU = 0;
+	uint64_t sumV = 0;
+	uint8_t y, u, v;
+
+	/* Adjust src[] for starting at window_.x */
+	src[0] += window_.x;
+	src[1] += window_.x / 2;
+	src[2] += window_.x / 2;
+
+	/* x += 4 sample every other 2x2 block */
+	for (int x = 0; x < (int)window_.width; x += 4) {
+		/*
+		 * Take y from the top left corner of the 2x2 block instead
+		 * of averaging 4 y-s.
+		 */
+		y = src[0][x];
+		u = src[1][x];
+		v = src[2][x];
+
+		sumY += y;
+		sumU += u;
+		sumV += v;
+
+		stats_.yHistogram[y * SwIspStats::kYHistogramSize / 256]++;
+	}
+
+	stats_.sumR_ += sumY;
+	stats_.sumG_ += sumU;
+	stats_.sumB_ += sumV;
+}
+
 /**
  * \brief Reset state to start statistics gathering for a new frame
  *
@@ -313,6 +348,9 @@  void SwStatsCpu::startFrame(void)
  */
 void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId)
 {
+	if (finishFrame_)
+		(this->*finishFrame_)();
+
 	*sharedStats_ = stats_;
 	statsReady.emit(frame, bufferId);
 }
@@ -362,6 +400,20 @@  int SwStatsCpu::setupStandardBayerOrder(BayerFormat::Order order)
 int SwStatsCpu::configure(const StreamConfiguration &inputCfg)
 {
 	stride_ = inputCfg.stride;
+	finishFrame_ = NULL;
+
+	if (inputCfg.pixelFormat == formats::YUV420) {
+		patternSize_.height = 2;
+		patternSize_.width = 2;
+		/* Skip every 3th and 4th line, sample every other 2x2 block */
+		ySkipMask_ = 0x02;
+		xShift_ = 0;
+		swapLines_ = false;
+		stats0_ = &SwStatsCpu::statsYUV420Line0;
+		processFrame_ = &SwStatsCpu::processYUV420Frame;
+		finishFrame_ = &SwStatsCpu::finishYUV420Frame;
+		return 0;
+	}
 
 	BayerFormat bayerFormat =
 		BayerFormat::fromPixelFormat(inputCfg.pixelFormat);
@@ -430,6 +482,43 @@  void SwStatsCpu::setWindow(const Rectangle &window)
 	window_.height &= ~(patternSize_.height - 1);
 }
 
+void SwStatsCpu::processYUV420Frame(MappedFrameBuffer &in)
+{
+	const uint8_t *linePointers[3];
+
+	linePointers[0] = in.planes()[0].data();
+	linePointers[1] = in.planes()[1].data();
+	linePointers[2] = in.planes()[2].data();
+
+	/* Adjust linePointers for starting at window_.y */
+	linePointers[0] += window_.y * stride_;
+	linePointers[1] += window_.y * stride_ / 4;
+	linePointers[2] += window_.y * stride_ / 4;
+
+	for (unsigned int y = 0; y < window_.height; y += 2) {
+		if (!(y & ySkipMask_))
+			(this->*stats0_)(linePointers);
+
+		linePointers[0] += stride_ * 2;
+		linePointers[1] += stride_ / 2;
+		linePointers[2] += stride_ / 2;
+	}
+}
+
+void SwStatsCpu::finishYUV420Frame()
+{
+	/* sumR_ / G_ / B_ contain Y / U / V sums convert this */
+	double divider = (uint64_t)window_.width * window_.height * 256 / 16;
+	double Y = (double)stats_.sumR_ / divider;
+	/* U and V 0 - 255 values represent -128 - 127 range */
+	double U = (double)stats_.sumG_ / divider - 0.5;
+	double V = (double)stats_.sumB_ / divider - 0.5;
+
+	stats_.sumR_ = (Y + 1.140 * V) * divider;
+	stats_.sumG_ = (Y - 0.395 * U - 0.581 * V) * divider;
+	stats_.sumB_ = (Y + 2.032 * U) * divider;
+}
+
 void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
 {
 	const uint8_t *src = in.planes()[0].data();