[libcamera-devel] statistics cleanup was Re: ... libcamera: ipa: Soft IPA: add a Soft IPA implementation
diff mbox series

Message ID ZXjl/SEqU3X6skMC@duo.ucw.cz
State New
Headers show
Series
  • [libcamera-devel] statistics cleanup was Re: ... libcamera: ipa: Soft IPA: add a Soft IPA implementation
Related show

Commit Message

Pavel Machek Dec. 12, 2023, 11 p.m. UTC
Hi!

> ATM the statistics and debayer classes are mostly ready,
> but I need to merge this with Andrey's v2 RFC and
> then produce a clean new series from this
> (assuming people like the approach).

Could I get you to apply this?

My camera is bayer8, so this is compile-tested only, but should give
same performance with shorter and more readable source. (It will also
allow me to add bayer8 statistics with little modifications).

Thank you and best regards,
								Pavel

Comments

Hans de Goede Dec. 13, 2023, 11:03 a.m. UTC | #1
Hi Pavel,

On 12/13/23 00:00, Pavel Machek wrote:
> Hi!
> 
>> ATM the statistics and debayer classes are mostly ready,
>> but I need to merge this with Andrey's v2 RFC and
>> then produce a clean new series from this
>> (assuming people like the approach).
> 
> Could I get you to apply this?

I can give it a try and assuming it does not cause
a performance regression I would be happy to
squash this in.

Which brings the question of crediting the change,
I can add a:

Suggested-by: Pavel Machek <pavel@ucw.cz>

for these bits, or:

Co-authored-by: Pavel Machek <pavel@ucw.cz>

but then I'm going to need a Signed-off-by from you
(just reply here with it).

Either way let me know which way you want to be credited?

The same question (how to credit) you also applies to
the suggested debayering changes ?

BTW did you see that I added a x_shift_ variable
to both the swstats and the debayer_cpu classes?

This allows shifting the input data start one
pixel to the right, changing e.g. RGGB to GRBG
so that we need less debayer functions.

Unfortunately this trick will not work for
10bpp packed formats because there we have:
MSB MSB MSB MSB LSBS in memory so we cannot
just shift things by skipping the first column
because then we end up with:
MSB MSB MSB LSBS MSB instead, which breaks
the debayer / stats functions expectations.

What we can do even with 10bpp packed is swap
line0 / line1 pointers, so that e.g. RGGB becomes
GBRG using the same stats / debayer functions.

Together with the x_shift_ thing this means that
for 8bits and 10 bits unpacked we will only need
1 line0 and line1 debayer function to cover all
4 basic bayer variants.

Regards,

Hans




> diff --git a/src/libcamera/software_isp/swstats_linaro.cpp b/src/libcamera/software_isp/swstats_linaro.cpp
> index 0323f45f..2f203b17 100644
> --- a/src/libcamera/software_isp/swstats_linaro.cpp
> +++ b/src/libcamera/software_isp/swstats_linaro.cpp
> @@ -38,80 +38,55 @@ static const unsigned int RED_Y_MUL = 77;		/* 0.30 * 256 */
>  static const unsigned int GREEN_Y_MUL = 150;		/* 0.59 * 256 */
>  static const unsigned int BLUE_Y_MUL = 29;		/* 0.11 * 256 */
>  
> -/*
> - * These need to be macros because it accesses a whole bunch of local
> - * variables (and copy and pasting this x times is undesirable)
> - */
> -#define SWISP_LINARO_START_LINE_STATS()			\
> -	uint8_t r, g, b;				\
> -	unsigned int y_val;				\
> -							\
> -	unsigned int sumR = 0;				\
> -	unsigned int sumG = 0;				\
> -	unsigned int sumB = 0;				\
> -							\
> -	unsigned int bright_sum = 0;			\
> -	unsigned int too_bright_sum = 0;
> -
> -#define SWISP_LINARO_ACCUMULATE_LINE_STATS()		\
> -	sumR += r;					\
> -	sumG += g;					\
> -	sumB += b;					\
> -							\
> -	y_val = r * RED_Y_MUL;				\
> -	y_val += g * GREEN_Y_MUL;			\
> -	y_val += b * BLUE_Y_MUL;			\
> -	if (y_val > BRIGHT_LVL) ++bright_sum;		\
> -	if (y_val > TOO_BRIGHT_LVL) ++too_bright_sum;
> -
> -#define SWISP_LINARO_FINISH_LINE_STATS()		\
> -	stats_.sumR_ += sumR;				\
> -	stats_.sumG_ += sumG;				\
> -	stats_.sumB_ += sumB;				\
> -							\
> -	bright_sum_ += bright_sum;			\
> -	too_bright_sum_ += too_bright_sum;
> -
> -void SwStatsLinaro::statsBGGR10PLine(const uint8_t *src0, const uint8_t *src1)
> +static void inline statsBayer(const int width, const uint8_t *src0, const uint8_t *src1, bool bggr, unsigned int &bright_sum, unsigned int &too_bright_sum, StatsLinaro &stats)
>  {
> -	const int width_in_bytes = width_ * 5 / 4;
> -	uint8_t g2;
> +	const int width_in_bytes = width * 5 / 4;
> +	uint8_t r, g, b, g2;
> +	unsigned int y_val;
>  
> -	SWISP_LINARO_START_LINE_STATS()
> +	stats.sumR_ = 0;
> +	stats.sumG_ = 0;
> +	stats.sumB_ = 0;
>  
> -	for (int x = 0; x < width_in_bytes; x += 5) {
> -		/* BGGR */
> -		b  = src0[x];
> -		g  = src0[x + 1];
> -		g2 = src1[x];
> -		r  = src1[x + 1];
> +	bright_sum = 0;
> +	too_bright_sum = 0;
>  
> +	for (int x = 0; x < width_in_bytes; x += 5) {
> +		if (bggr) {
> +			/* BGGR */
> +			b  = src0[x];
> +			g  = src0[x + 1];
> +			g2 = src1[x];
> +			r  = src1[x + 1];
> +		} else {
> +			/* GBRG */
> +			g  = src0[x];
> +			b  = src0[x + 1];
> +			r  = src1[x];
> +			g2 = src1[x + 1];
> +		}
>  		g = g + g2 / 2;
>  
> -		SWISP_LINARO_ACCUMULATE_LINE_STATS()
> +		stats.sumR_ += r;
> +		stats.sumG_ += g;
> +		stats.sumB_ += b;
> +
> +		y_val = r * RED_Y_MUL;
> +		y_val += g * GREEN_Y_MUL;
> +		y_val += b * BLUE_Y_MUL;
> +		if (y_val > BRIGHT_LVL) ++bright_sum;
> +		if (y_val > TOO_BRIGHT_LVL) ++too_bright_sum;
>  	}
> -	SWISP_LINARO_FINISH_LINE_STATS()
>  }
>  
> -void SwStatsLinaro::statsGBRG10PLine(const uint8_t *src0, const uint8_t *src1)
> +void SwStatsLinaro::statsBGGR10PLine(const uint8_t *src0, const uint8_t *src1)
>  {
> -	const int width_in_bytes = width_ * 5 / 4;
> -	uint8_t g2;
> -
> -	SWISP_LINARO_START_LINE_STATS()
> -
> -	for (int x = 0; x < width_in_bytes; x += 5) {
> -		/* GBRG */
> -		g  = src0[x];
> -		b  = src0[x + 1];
> -		r  = src1[x];
> -		g2 = src1[x + 1];
> -
> -		g = g + g2 / 2;
> +	statsBayer(width_, src0, src1, true, bright_sum_, too_bright_sum_, stats_);
> +}
>  
> -		SWISP_LINARO_ACCUMULATE_LINE_STATS()
> -	}
> -	SWISP_LINARO_FINISH_LINE_STATS()
> +void SwStatsLinaro::statsGBRG10PLine(const uint8_t *src0, const uint8_t *src1)
> +{
> +	statsBayer(width_, src0, src1, false, bright_sum_, too_bright_sum_, stats_);
>  }
>  
>  void SwStatsLinaro::statsBGGR10PLine0(const uint8_t *src, unsigned int stride)
>
Pavel Machek Dec. 13, 2023, 1:01 p.m. UTC | #2
Hi!

> >> ATM the statistics and debayer classes are mostly ready,
> >> but I need to merge this with Andrey's v2 RFC and
> >> then produce a clean new series from this
> >> (assuming people like the approach).
> > 
> > Could I get you to apply this?
> 
> I can give it a try and assuming it does not cause
> a performance regression I would be happy to
> squash this in.
> 
> Which brings the question of crediting the change,
> I can add a:
> 
> Suggested-by: Pavel Machek <pavel@ucw.cz>

This one will be fine.

> but then I'm going to need a Signed-off-by from you
> (just reply here with it).

Signed-off-by: Pavel Machek <pavel@ucw.cz>

> Either way let me know which way you want to be credited?
> 
> The same question (how to credit) you also applies to
> the suggested debayering changes ?

Suggested-by will be fine... and

Signed-off-by: Pavel Machek <pavel@ucw.cz>

for that as well.

> BTW did you see that I added a x_shift_ variable
> to both the swstats and the debayer_cpu classes?
> 
> This allows shifting the input data start one
> pixel to the right, changing e.g. RGGB to GRBG
> so that we need less debayer functions.
> 
> Unfortunately this trick will not work for
> 10bpp packed formats because there we have:
> MSB MSB MSB MSB LSBS in memory so we cannot
> just shift things by skipping the first column
> because then we end up with:
> MSB MSB MSB LSBS MSB instead, which breaks
> the debayer / stats functions expectations.

Aha, neat trick I did not notice. I'll take a look. At some point, I
guess we'll need better abstractions that separate getting bayer data
(there are at least 8, 10, 10p, 12 variants -- and some people will
want to use the low bits) and actual debayer algorithm (you have
bi-linear, but there's also nearest point, and there are also higher
quality variants).

But, this all can wait.

I believe first step is getting your series merged. Adding 8bpp to
that should be easy after it is in, and makes pinephone useful. (And
likely helps to others).

Then... I guess doing debayer+downscale for preview makes sense. Doing
it in one step should save significant CPU. But then "what to do with
rgb stream for video recording" is a good question. May need better
quality than we have (use full 10 bits, lens shading compensation),
and is still performance sensitive, and maybe clipping to odd
resolution as we currently do will be a problem there.

Best regards,
								Pavel
Hans de Goede Dec. 13, 2023, 8:38 p.m. UTC | #3
Hi,

On 12/13/23 00:00, Pavel Machek wrote:
> Hi!
> 
>> ATM the statistics and debayer classes are mostly ready,
>> but I need to merge this with Andrey's v2 RFC and
>> then produce a clean new series from this
>> (assuming people like the approach).
> 
> Could I get you to apply this?
> 
> My camera is bayer8, so this is compile-tested only, but should give
> same performance with shorter and more readable source. (It will also
> allow me to add bayer8 statistics with little modifications).
> 
> Thank you and best regards,
> 								Pavel

Thank you.

Note your implementation was broken because you were resetting
the per frame stats every line. The idea is to use a local variable
(hopefully a register) to gather the local r + g + b sums and
then at the end of processing the entire line increase
the per frame stats using the local variable.

I have merged / squashed a fixed version with some further cleanups:

From ea1be60a181fad3a782fe970198906593601c3d3 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 13 Dec 2023 00:00:13 +0100
Subject: [PATCH] Pavel swstats changes

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../internal/software_isp/swstats_cpu.h       |   2 -
 src/libcamera/software_isp/swstats_cpu.cpp    | 120 +++++++-----------
 2 files changed, 46 insertions(+), 76 deletions(-)

diff --git a/include/libcamera/internal/software_isp/swstats_cpu.h b/include/libcamera/internal/software_isp/swstats_cpu.h
index 993ade3e..0ceb995f 100644
--- a/include/libcamera/internal/software_isp/swstats_cpu.h
+++ b/include/libcamera/internal/software_isp/swstats_cpu.h
@@ -29,8 +29,6 @@ public:
 	/* FIXME this should be dropped once AWB has moved to the IPA */
 	SwIspStats getStats() { return *sharedStats_; }
 private:
-	void statsBGGR10PLine(const uint8_t *src0, const uint8_t *src1);
-	void statsGBRG10PLine(const uint8_t *src0, const uint8_t *src1);
 	void statsBGGR10PLine0(const uint8_t *src, unsigned int stride);
 	void statsGBRG10PLine0(const uint8_t *src, unsigned int stride);
 	void statsGRBG10PLine0(const uint8_t *src, unsigned int stride);
diff --git a/src/libcamera/software_isp/swstats_cpu.cpp b/src/libcamera/software_isp/swstats_cpu.cpp
index 161dd033..84edadd8 100644
--- a/src/libcamera/software_isp/swstats_cpu.cpp
+++ b/src/libcamera/software_isp/swstats_cpu.cpp
@@ -36,102 +36,74 @@ static const unsigned int RED_Y_MUL = 77;		/* 0.30 * 256 */
 static const unsigned int GREEN_Y_MUL = 150;		/* 0.59 * 256 */
 static const unsigned int BLUE_Y_MUL = 29;		/* 0.11 * 256 */
 
-/*
- * These need to be macros because it accesses a whole bunch of local
- * variables (and copy and pasting this x times is undesirable)
- */
-#define SWISP_LINARO_START_LINE_STATS()			\
-	uint8_t r, g, b;				\
-	unsigned int y_val;				\
-							\
-	unsigned int sumR = 0;				\
-	unsigned int sumG = 0;				\
-	unsigned int sumB = 0;				\
-							\
-	unsigned int bright_sum = 0;			\
+static inline __attribute__((always_inline)) void
+statsBayer10P(const int width, const uint8_t *src0, const uint8_t *src1, bool bggr, unsigned int &bright_sum_, unsigned int &too_bright_sum_, SwIspStats &stats_)
+{
+	const int width_in_bytes = width * 5 / 4;
+	uint8_t r, g, b, g2;
+	unsigned int y_val;
+	unsigned int sumR = 0;
+	unsigned int sumG = 0;
+	unsigned int sumB = 0;
+
+	unsigned int bright_sum = 0;
 	unsigned int too_bright_sum = 0;
 
-#define SWISP_LINARO_ACCUMULATE_LINE_STATS()		\
-	sumR += r;					\
-	sumG += g;					\
-	sumB += b;					\
-							\
-	y_val = r * RED_Y_MUL;				\
-	y_val += g * GREEN_Y_MUL;			\
-	y_val += b * BLUE_Y_MUL;			\
-	if (y_val > BRIGHT_LVL) ++bright_sum;		\
-	if (y_val > TOO_BRIGHT_LVL) ++too_bright_sum;
+	for (int x = 0; x < width_in_bytes; x += 5) {
+		if (bggr) {
+			/* BGGR */
+			b  = src0[x];
+			g  = src0[x + 1];
+			g2 = src1[x];
+			r  = src1[x + 1];
+		} else {
+			/* GBRG */
+			g  = src0[x];
+			b  = src0[x + 1];
+			r  = src1[x];
+			g2 = src1[x + 1];
+		}
+		g = g + g2 / 2;
 
-#define SWISP_LINARO_FINISH_LINE_STATS()		\
-	stats_.sumR_ += sumR;				\
-	stats_.sumG_ += sumG;				\
-	stats_.sumB_ += sumB;				\
-							\
-	bright_sum_ += bright_sum;			\
+		sumR += r;
+		sumG += g;
+		sumB += b;
+
+		y_val = r * RED_Y_MUL;
+		y_val += g * GREEN_Y_MUL;
+		y_val += b * BLUE_Y_MUL;
+		if (y_val > BRIGHT_LVL) ++bright_sum;
+		if (y_val > TOO_BRIGHT_LVL) ++too_bright_sum;
+	}
+
+	stats_.sumR_ += sumR;
+	stats_.sumG_ += sumG;
+	stats_.sumB_ += sumB;
+
+	bright_sum_ += bright_sum;
 	too_bright_sum_ += too_bright_sum;
-
-void SwStatsCpu::statsBGGR10PLine(const uint8_t *src0, const uint8_t *src1)
-{
-	const int width_in_bytes = window_.width * 5 / 4;
-	uint8_t g2;
-
-	SWISP_LINARO_START_LINE_STATS()
-
-	for (int x = 0; x < width_in_bytes; x += 5) {
-		/* BGGR */
-		b  = src0[x];
-		g  = src0[x + 1];
-		g2 = src1[x];
-		r  = src1[x + 1];
-
-		g = g + g2 / 2;
-
-		SWISP_LINARO_ACCUMULATE_LINE_STATS()
-	}
-	SWISP_LINARO_FINISH_LINE_STATS()
-}
-
-void SwStatsCpu::statsGBRG10PLine(const uint8_t *src0, const uint8_t *src1)
-{
-	const int width_in_bytes = window_.width * 5 / 4;
-	uint8_t g2;
-
-	SWISP_LINARO_START_LINE_STATS()
-
-	for (int x = 0; x < width_in_bytes; x += 5) {
-		/* GBRG */
-		g  = src0[x];
-		b  = src0[x + 1];
-		r  = src1[x];
-		g2 = src1[x + 1];
-
-		g = g + g2 / 2;
-
-		SWISP_LINARO_ACCUMULATE_LINE_STATS()
-	}
-	SWISP_LINARO_FINISH_LINE_STATS()
 }
 
 void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src, unsigned int stride)
 {
-	statsBGGR10PLine(src, src + stride);
+	statsBayer10P(window_.width, src, src + stride, true, bright_sum_, too_bright_sum_, stats_);
 }
 
 void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src, unsigned int stride)
 {
-	statsGBRG10PLine(src, src + stride);
+	statsBayer10P(window_.width, src, src + stride, false, bright_sum_, too_bright_sum_, stats_);
 }
 
 void SwStatsCpu::statsGRBG10PLine0(const uint8_t *src, unsigned int stride)
 {
 	/* GRBG is BGGR with the lines swapped */
-	statsBGGR10PLine(src + stride, src);
+	statsBayer10P(window_.width, src + stride, src, true, bright_sum_, too_bright_sum_, stats_);
 }
 
 void SwStatsCpu::statsRGGB10PLine0(const uint8_t *src, unsigned int stride)
 {
 	/* RGGB is GBRG with the lines swapped */
-	statsGBRG10PLine(src + stride, src);
+	statsBayer10P(window_.width, src + stride, src, false, bright_sum_, too_bright_sum_, stats_);
 }
 
 void SwStatsCpu::resetStats(void)
Pavel Machek Dec. 13, 2023, 9:39 p.m. UTC | #4
Hi!

> Note your implementation was broken because you were resetting
> the per frame stats every line. The idea is to use a local variable
> (hopefully a register) to gather the local r + g + b sums and
> then at the end of processing the entire line increase
> the per frame stats using the local variable.

Yes, I seen that optimalization, I just hoped that compiler is able to
this on its own when it has enough registers.

> I have merged / squashed a fixed version with some further cleanups:

Thank you, looks good.

Did we decide on the name? swisp_cpu or something like that was
mentioned, but perhaps we cpuisp is shorter and also should be unique
enough?

Best regards,
							Pavel

Patch
diff mbox series

diff --git a/src/libcamera/software_isp/swstats_linaro.cpp b/src/libcamera/software_isp/swstats_linaro.cpp
index 0323f45f..2f203b17 100644
--- a/src/libcamera/software_isp/swstats_linaro.cpp
+++ b/src/libcamera/software_isp/swstats_linaro.cpp
@@ -38,80 +38,55 @@  static const unsigned int RED_Y_MUL = 77;		/* 0.30 * 256 */
 static const unsigned int GREEN_Y_MUL = 150;		/* 0.59 * 256 */
 static const unsigned int BLUE_Y_MUL = 29;		/* 0.11 * 256 */
 
-/*
- * These need to be macros because it accesses a whole bunch of local
- * variables (and copy and pasting this x times is undesirable)
- */
-#define SWISP_LINARO_START_LINE_STATS()			\
-	uint8_t r, g, b;				\
-	unsigned int y_val;				\
-							\
-	unsigned int sumR = 0;				\
-	unsigned int sumG = 0;				\
-	unsigned int sumB = 0;				\
-							\
-	unsigned int bright_sum = 0;			\
-	unsigned int too_bright_sum = 0;
-
-#define SWISP_LINARO_ACCUMULATE_LINE_STATS()		\
-	sumR += r;					\
-	sumG += g;					\
-	sumB += b;					\
-							\
-	y_val = r * RED_Y_MUL;				\
-	y_val += g * GREEN_Y_MUL;			\
-	y_val += b * BLUE_Y_MUL;			\
-	if (y_val > BRIGHT_LVL) ++bright_sum;		\
-	if (y_val > TOO_BRIGHT_LVL) ++too_bright_sum;
-
-#define SWISP_LINARO_FINISH_LINE_STATS()		\
-	stats_.sumR_ += sumR;				\
-	stats_.sumG_ += sumG;				\
-	stats_.sumB_ += sumB;				\
-							\
-	bright_sum_ += bright_sum;			\
-	too_bright_sum_ += too_bright_sum;
-
-void SwStatsLinaro::statsBGGR10PLine(const uint8_t *src0, const uint8_t *src1)
+static void inline statsBayer(const int width, const uint8_t *src0, const uint8_t *src1, bool bggr, unsigned int &bright_sum, unsigned int &too_bright_sum, StatsLinaro &stats)
 {
-	const int width_in_bytes = width_ * 5 / 4;
-	uint8_t g2;
+	const int width_in_bytes = width * 5 / 4;
+	uint8_t r, g, b, g2;
+	unsigned int y_val;
 
-	SWISP_LINARO_START_LINE_STATS()
+	stats.sumR_ = 0;
+	stats.sumG_ = 0;
+	stats.sumB_ = 0;
 
-	for (int x = 0; x < width_in_bytes; x += 5) {
-		/* BGGR */
-		b  = src0[x];
-		g  = src0[x + 1];
-		g2 = src1[x];
-		r  = src1[x + 1];
+	bright_sum = 0;
+	too_bright_sum = 0;
 
+	for (int x = 0; x < width_in_bytes; x += 5) {
+		if (bggr) {
+			/* BGGR */
+			b  = src0[x];
+			g  = src0[x + 1];
+			g2 = src1[x];
+			r  = src1[x + 1];
+		} else {
+			/* GBRG */
+			g  = src0[x];
+			b  = src0[x + 1];
+			r  = src1[x];
+			g2 = src1[x + 1];
+		}
 		g = g + g2 / 2;
 
-		SWISP_LINARO_ACCUMULATE_LINE_STATS()
+		stats.sumR_ += r;
+		stats.sumG_ += g;
+		stats.sumB_ += b;
+
+		y_val = r * RED_Y_MUL;
+		y_val += g * GREEN_Y_MUL;
+		y_val += b * BLUE_Y_MUL;
+		if (y_val > BRIGHT_LVL) ++bright_sum;
+		if (y_val > TOO_BRIGHT_LVL) ++too_bright_sum;
 	}
-	SWISP_LINARO_FINISH_LINE_STATS()
 }
 
-void SwStatsLinaro::statsGBRG10PLine(const uint8_t *src0, const uint8_t *src1)
+void SwStatsLinaro::statsBGGR10PLine(const uint8_t *src0, const uint8_t *src1)
 {
-	const int width_in_bytes = width_ * 5 / 4;
-	uint8_t g2;
-
-	SWISP_LINARO_START_LINE_STATS()
-
-	for (int x = 0; x < width_in_bytes; x += 5) {
-		/* GBRG */
-		g  = src0[x];
-		b  = src0[x + 1];
-		r  = src1[x];
-		g2 = src1[x + 1];
-
-		g = g + g2 / 2;
+	statsBayer(width_, src0, src1, true, bright_sum_, too_bright_sum_, stats_);
+}
 
-		SWISP_LINARO_ACCUMULATE_LINE_STATS()
-	}
-	SWISP_LINARO_FINISH_LINE_STATS()
+void SwStatsLinaro::statsGBRG10PLine(const uint8_t *src0, const uint8_t *src1)
+{
+	statsBayer(width_, src0, src1, false, bright_sum_, too_bright_sum_, stats_);
 }
 
 void SwStatsLinaro::statsBGGR10PLine0(const uint8_t *src, unsigned int stride)