[v3] libcamera: debayer_cpu: Add 32bits/aligned output formats
diff mbox series

Message ID 20240618063227.197989-1-robert.mader@collabora.com
State Accepted
Commit 437e601653e69c82f5396979d99e7b9b5bb6086b
Headers show
Series
  • [v3] libcamera: debayer_cpu: Add 32bits/aligned output formats
Related show

Commit Message

Robert Mader June 18, 2024, 6:31 a.m. UTC
In order to be more compatible with modern hardware and APIs. This
notably allows GL implementations to directly import the buffers more
often and seems to be required for Wayland.

Further more, as we already enforce a 8 byte stride, these formats work
better for clients that don't support padding - such as libwebrtc at the
time of writing.

Tested devices:
 - Librem5
 - PinePhone
 - Thinkpad X13s

Signed-off-by: Robert Mader <robert.mader@collabora.com>
Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>

---

Changes in v3:
 - Remove previously introduced variable again and use C++ templates
   instead in order to avoid runtime costs
 - Small cleanups and linting fixes

Changes in v2:
 - Reduce code duplication by using a runtime variable instead
 - Small cleanups
---
 src/libcamera/software_isp/debayer_cpu.cpp | 75 +++++++++++++++++-----
 src/libcamera/software_isp/debayer_cpu.h   | 10 +++
 2 files changed, 69 insertions(+), 16 deletions(-)

Comments

Milan Zamazal June 18, 2024, 6:15 p.m. UTC | #1
Robert Mader <robert.mader@collabora.com> writes:

> In order to be more compatible with modern hardware and APIs. This
> notably allows GL implementations to directly import the buffers more
> often and seems to be required for Wayland.
>
> Further more, as we already enforce a 8 byte stride, these formats work
> better for clients that don't support padding - such as libwebrtc at the
> time of writing.
>
> Tested devices:
>  - Librem5
>  - PinePhone
>  - Thinkpad X13s
>
> Signed-off-by: Robert Mader <robert.mader@collabora.com>
> Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
>
> ---
>
> Changes in v3:
>  - Remove previously introduced variable again and use C++ templates
>    instead in order to avoid runtime costs
>  - Small cleanups and linting fixes
>
> Changes in v2:
>  - Reduce code duplication by using a runtime variable instead
>  - Small cleanups

Hi Robert,

this looks much better, thank you.

Reviewed-by: Milan Zamazal <mzamazal@redhat.com>

> ---
>  src/libcamera/software_isp/debayer_cpu.cpp | 75 +++++++++++++++++-----
>  src/libcamera/software_isp/debayer_cpu.h   | 10 +++
>  2 files changed, 69 insertions(+), 16 deletions(-)
>
> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> index c038eed4..f8d2677d 100644
> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> @@ -74,6 +74,8 @@ DebayerCpu::~DebayerCpu()
>  	*dst++ = blue_[curr[x] / (div)];                                                      \
>  	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \
>  	*dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> +	if constexpr (addAlphaByte)                                                           \
> +		*dst++ = 255;                                                                 \
>  	x++;
>  
>  /*
> @@ -85,6 +87,8 @@ DebayerCpu::~DebayerCpu()
>  	*dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \
>  	*dst++ = green_[curr[x] / (div)];                         \
>  	*dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> +	if constexpr (addAlphaByte)                               \
> +		*dst++ = 255;                                     \
>  	x++;
>  
>  /*
> @@ -96,6 +100,8 @@ DebayerCpu::~DebayerCpu()
>  	*dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
>  	*dst++ = green_[curr[x] / (div)];                          \
>  	*dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
> +	if constexpr (addAlphaByte)                                \
> +		*dst++ = 255;                                      \
>  	x++;
>  
>  /*
> @@ -107,8 +113,11 @@ DebayerCpu::~DebayerCpu()
>  	*dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
>  	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \
>  	*dst++ = red_[curr[x] / (div)];                                                        \
> +	if constexpr (addAlphaByte)                                                            \
> +		*dst++ = 255;                                                                  \
>  	x++;
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint8_t)
> @@ -119,6 +128,7 @@ void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint8_t)
> @@ -129,6 +139,7 @@ void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint16_t)
> @@ -140,6 +151,7 @@ void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint16_t)
> @@ -151,6 +163,7 @@ void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint16_t)
> @@ -162,6 +175,7 @@ void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint16_t)
> @@ -173,6 +187,7 @@ void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	const int widthInBytes = window_.width * 5 / 4;
> @@ -198,6 +213,7 @@ void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	const int widthInBytes = window_.width * 5 / 4;
> @@ -218,6 +234,7 @@ void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	const int widthInBytes = window_.width * 5 / 4;
> @@ -238,6 +255,7 @@ void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	const int widthInBytes = window_.width * 5 / 4;
> @@ -280,7 +298,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>  		config.bpp = (bayerFormat.bitDepth + 7) & ~7;
>  		config.patternSize.width = 2;
>  		config.patternSize.height = 2;
> -		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
> +		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
> +								  formats::XRGB8888,
> +								  formats::ARGB8888,
> +								  formats::BGR888,
> +								  formats::XBGR8888,
> +								  formats::ABGR8888 });
>  		return 0;
>  	}
>  
> @@ -290,7 +313,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>  		config.bpp = 10;
>  		config.patternSize.width = 4; /* 5 bytes per *4* pixels */
>  		config.patternSize.height = 2;
> -		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
> +		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
> +								  formats::XRGB8888,
> +								  formats::ARGB8888,
> +								  formats::BGR888,
> +								  formats::XBGR8888,
> +								  formats::ABGR8888 });
>  		return 0;
>  	}
>  
> @@ -306,6 +334,12 @@ int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &c
>  		return 0;
>  	}
>  
> +	if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
> +	    outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
> +		config.bpp = 32;
> +		return 0;
> +	}
> +
>  	LOG(Debayer, Info)
>  		<< "Unsupported output format " << outputFormat.toString();
>  	return -EINVAL;
> @@ -341,6 +375,7 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  {
>  	BayerFormat bayerFormat =
>  		BayerFormat::fromPixelFormat(inputFormat);
> +	bool addAlphaByte = false;
>  
>  	xShift_ = 0;
>  	swapRedBlueGains_ = false;
> @@ -351,8 +386,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  	};
>  
>  	switch (outputFormat) {
> +	case formats::XRGB8888:
> +	case formats::ARGB8888:
> +		addAlphaByte = true;
> +		[[fallthrough]];
>  	case formats::RGB888:
>  		break;
> +	case formats::XBGR8888:
> +	case formats::ABGR8888:
> +		addAlphaByte = true;
> +		[[fallthrough]];
>  	case formats::BGR888:
>  		/* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
>  		swapRedBlueGains_ = true;
> @@ -383,16 +426,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  	    isStandardBayerOrder(bayerFormat.order)) {
>  		switch (bayerFormat.bitDepth) {
>  		case 8:
> -			debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer8_BGBG_BGR888<true> : &DebayerCpu::debayer8_BGBG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer8_GRGR_BGR888<true> : &DebayerCpu::debayer8_GRGR_BGR888<false>;
>  			break;
>  		case 10:
> -			debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10_BGBG_BGR888<true> : &DebayerCpu::debayer10_BGBG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10_GRGR_BGR888<true> : &DebayerCpu::debayer10_GRGR_BGR888<false>;
>  			break;
>  		case 12:
> -			debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer12_BGBG_BGR888<true> : &DebayerCpu::debayer12_BGBG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer12_GRGR_BGR888<true> : &DebayerCpu::debayer12_GRGR_BGR888<false>;
>  			break;
>  		}
>  		setupStandardBayerOrder(bayerFormat.order);
> @@ -403,20 +446,20 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  	    bayerFormat.packing == BayerFormat::Packing::CSI2) {
>  		switch (bayerFormat.order) {
>  		case BayerFormat::BGGR:
> -			debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
>  			return 0;
>  		case BayerFormat::GBRG:
> -			debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
>  			return 0;
>  		case BayerFormat::GRBG:
> -			debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
>  			return 0;
>  		case BayerFormat::RGGB:
> -			debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
>  			return 0;
>  		default:
>  			break;
> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
> index be7dcdca..1dac6435 100644
> --- a/src/libcamera/software_isp/debayer_cpu.h
> +++ b/src/libcamera/software_isp/debayer_cpu.h
> @@ -85,18 +85,28 @@ private:
>  	using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);
>  
>  	/* 8-bit raw bayer format */
> +	template<bool addAlphaByte>
>  	void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>  	/* unpacked 10-bit raw bayer format */
> +	template<bool addAlphaByte>
>  	void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>  	/* unpacked 12-bit raw bayer format */
> +	template<bool addAlphaByte>
>  	void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>  	/* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
> +	template<bool addAlphaByte>
>  	void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
>  
>  	struct DebayerInputConfig {
Kieran Bingham June 19, 2024, 9:57 a.m. UTC | #2
Quoting Robert Mader (2024-06-18 07:31:59)
> In order to be more compatible with modern hardware and APIs. This
> notably allows GL implementations to directly import the buffers more
> often and seems to be required for Wayland.
> 
> Further more, as we already enforce a 8 byte stride, these formats work
> better for clients that don't support padding - such as libwebrtc at the
> time of writing.
> 
> Tested devices:
>  - Librem5
>  - PinePhone
>  - Thinkpad X13s
> 
> Signed-off-by: Robert Mader <robert.mader@collabora.com>
> Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>

Re-tested again on the x13s, and firefox-nightly.

Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>


> 
> ---
> 
> Changes in v3:
>  - Remove previously introduced variable again and use C++ templates
>    instead in order to avoid runtime costs
>  - Small cleanups and linting fixes
> 
> Changes in v2:
>  - Reduce code duplication by using a runtime variable instead
>  - Small cleanups
> ---
>  src/libcamera/software_isp/debayer_cpu.cpp | 75 +++++++++++++++++-----
>  src/libcamera/software_isp/debayer_cpu.h   | 10 +++
>  2 files changed, 69 insertions(+), 16 deletions(-)
> 
> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> index c038eed4..f8d2677d 100644
> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> @@ -74,6 +74,8 @@ DebayerCpu::~DebayerCpu()
>         *dst++ = blue_[curr[x] / (div)];                                                      \
>         *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \
>         *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> +       if constexpr (addAlphaByte)                                                           \
> +               *dst++ = 255;                                                                 \
>         x++;
>  
>  /*
> @@ -85,6 +87,8 @@ DebayerCpu::~DebayerCpu()
>         *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \
>         *dst++ = green_[curr[x] / (div)];                         \
>         *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> +       if constexpr (addAlphaByte)                               \
> +               *dst++ = 255;                                     \
>         x++;
>  
>  /*
> @@ -96,6 +100,8 @@ DebayerCpu::~DebayerCpu()
>         *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
>         *dst++ = green_[curr[x] / (div)];                          \
>         *dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
> +       if constexpr (addAlphaByte)                                \
> +               *dst++ = 255;                                      \
>         x++;
>  
>  /*
> @@ -107,8 +113,11 @@ DebayerCpu::~DebayerCpu()
>         *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
>         *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \
>         *dst++ = red_[curr[x] / (div)];                                                        \
> +       if constexpr (addAlphaByte)                                                            \
> +               *dst++ = 255;                                                                  \
>         x++;
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         DECLARE_SRC_POINTERS(uint8_t)
> @@ -119,6 +128,7 @@ void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         DECLARE_SRC_POINTERS(uint8_t)
> @@ -129,6 +139,7 @@ void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         DECLARE_SRC_POINTERS(uint16_t)
> @@ -140,6 +151,7 @@ void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         DECLARE_SRC_POINTERS(uint16_t)
> @@ -151,6 +163,7 @@ void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         DECLARE_SRC_POINTERS(uint16_t)
> @@ -162,6 +175,7 @@ void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         DECLARE_SRC_POINTERS(uint16_t)
> @@ -173,6 +187,7 @@ void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         const int widthInBytes = window_.width * 5 / 4;
> @@ -198,6 +213,7 @@ void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         const int widthInBytes = window_.width * 5 / 4;
> @@ -218,6 +234,7 @@ void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         const int widthInBytes = window_.width * 5 / 4;
> @@ -238,6 +255,7 @@ void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>         }
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>         const int widthInBytes = window_.width * 5 / 4;
> @@ -280,7 +298,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>                 config.bpp = (bayerFormat.bitDepth + 7) & ~7;
>                 config.patternSize.width = 2;
>                 config.patternSize.height = 2;
> -               config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
> +               config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
> +                                                                 formats::XRGB8888,
> +                                                                 formats::ARGB8888,
> +                                                                 formats::BGR888,
> +                                                                 formats::XBGR8888,
> +                                                                 formats::ABGR8888 });
>                 return 0;
>         }
>  
> @@ -290,7 +313,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>                 config.bpp = 10;
>                 config.patternSize.width = 4; /* 5 bytes per *4* pixels */
>                 config.patternSize.height = 2;
> -               config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
> +               config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
> +                                                                 formats::XRGB8888,
> +                                                                 formats::ARGB8888,
> +                                                                 formats::BGR888,
> +                                                                 formats::XBGR8888,
> +                                                                 formats::ABGR8888 });
>                 return 0;
>         }
>  
> @@ -306,6 +334,12 @@ int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &c
>                 return 0;
>         }
>  
> +       if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
> +           outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
> +               config.bpp = 32;
> +               return 0;
> +       }
> +
>         LOG(Debayer, Info)
>                 << "Unsupported output format " << outputFormat.toString();
>         return -EINVAL;
> @@ -341,6 +375,7 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  {
>         BayerFormat bayerFormat =
>                 BayerFormat::fromPixelFormat(inputFormat);
> +       bool addAlphaByte = false;
>  
>         xShift_ = 0;
>         swapRedBlueGains_ = false;
> @@ -351,8 +386,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>         };
>  
>         switch (outputFormat) {
> +       case formats::XRGB8888:
> +       case formats::ARGB8888:
> +               addAlphaByte = true;
> +               [[fallthrough]];
>         case formats::RGB888:
>                 break;
> +       case formats::XBGR8888:
> +       case formats::ABGR8888:
> +               addAlphaByte = true;
> +               [[fallthrough]];
>         case formats::BGR888:
>                 /* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
>                 swapRedBlueGains_ = true;
> @@ -383,16 +426,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>             isStandardBayerOrder(bayerFormat.order)) {
>                 switch (bayerFormat.bitDepth) {
>                 case 8:
> -                       debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888;
> -                       debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888;
> +                       debayer0_ = addAlphaByte ? &DebayerCpu::debayer8_BGBG_BGR888<true> : &DebayerCpu::debayer8_BGBG_BGR888<false>;
> +                       debayer1_ = addAlphaByte ? &DebayerCpu::debayer8_GRGR_BGR888<true> : &DebayerCpu::debayer8_GRGR_BGR888<false>;
>                         break;
>                 case 10:
> -                       debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888;
> -                       debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888;
> +                       debayer0_ = addAlphaByte ? &DebayerCpu::debayer10_BGBG_BGR888<true> : &DebayerCpu::debayer10_BGBG_BGR888<false>;
> +                       debayer1_ = addAlphaByte ? &DebayerCpu::debayer10_GRGR_BGR888<true> : &DebayerCpu::debayer10_GRGR_BGR888<false>;
>                         break;
>                 case 12:
> -                       debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888;
> -                       debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888;
> +                       debayer0_ = addAlphaByte ? &DebayerCpu::debayer12_BGBG_BGR888<true> : &DebayerCpu::debayer12_BGBG_BGR888<false>;
> +                       debayer1_ = addAlphaByte ? &DebayerCpu::debayer12_GRGR_BGR888<true> : &DebayerCpu::debayer12_GRGR_BGR888<false>;
>                         break;
>                 }
>                 setupStandardBayerOrder(bayerFormat.order);
> @@ -403,20 +446,20 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>             bayerFormat.packing == BayerFormat::Packing::CSI2) {
>                 switch (bayerFormat.order) {
>                 case BayerFormat::BGGR:
> -                       debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> -                       debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> +                       debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
> +                       debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
>                         return 0;
>                 case BayerFormat::GBRG:
> -                       debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> -                       debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> +                       debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
> +                       debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
>                         return 0;
>                 case BayerFormat::GRBG:
> -                       debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> -                       debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> +                       debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
> +                       debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
>                         return 0;
>                 case BayerFormat::RGGB:
> -                       debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> -                       debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> +                       debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
> +                       debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
>                         return 0;
>                 default:
>                         break;
> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
> index be7dcdca..1dac6435 100644
> --- a/src/libcamera/software_isp/debayer_cpu.h
> +++ b/src/libcamera/software_isp/debayer_cpu.h
> @@ -85,18 +85,28 @@ private:
>         using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);
>  
>         /* 8-bit raw bayer format */
> +       template<bool addAlphaByte>
>         void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +       template<bool addAlphaByte>
>         void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>         /* unpacked 10-bit raw bayer format */
> +       template<bool addAlphaByte>
>         void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +       template<bool addAlphaByte>
>         void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>         /* unpacked 12-bit raw bayer format */
> +       template<bool addAlphaByte>
>         void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +       template<bool addAlphaByte>
>         void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>         /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
> +       template<bool addAlphaByte>
>         void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +       template<bool addAlphaByte>
>         void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> +       template<bool addAlphaByte>
>         void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
> +       template<bool addAlphaByte>
>         void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
>  
>         struct DebayerInputConfig {
> -- 
> 2.45.2
>
Hans de Goede June 24, 2024, 9:37 a.m. UTC | #3
Hi,

On 6/18/24 8:31 AM, Robert Mader wrote:
> In order to be more compatible with modern hardware and APIs. This
> notably allows GL implementations to directly import the buffers more
> often and seems to be required for Wayland.
> 
> Further more, as we already enforce a 8 byte stride, these formats work
> better for clients that don't support padding - such as libwebrtc at the
> time of writing.
> 
> Tested devices:
>  - Librem5
>  - PinePhone
>  - Thinkpad X13s
> 
> Signed-off-by: Robert Mader <robert.mader@collabora.com>
> Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>

Thanks, patch looks good to me:

Reviewed-by: Hans de Goede <hdegoede@redhat.com>

Regards,

Hans




> 
> ---
> 
> Changes in v3:
>  - Remove previously introduced variable again and use C++ templates
>    instead in order to avoid runtime costs
>  - Small cleanups and linting fixes
> 
> Changes in v2:
>  - Reduce code duplication by using a runtime variable instead
>  - Small cleanups
> ---
>  src/libcamera/software_isp/debayer_cpu.cpp | 75 +++++++++++++++++-----
>  src/libcamera/software_isp/debayer_cpu.h   | 10 +++
>  2 files changed, 69 insertions(+), 16 deletions(-)
> 
> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> index c038eed4..f8d2677d 100644
> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> @@ -74,6 +74,8 @@ DebayerCpu::~DebayerCpu()
>  	*dst++ = blue_[curr[x] / (div)];                                                      \
>  	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \
>  	*dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> +	if constexpr (addAlphaByte)                                                           \
> +		*dst++ = 255;                                                                 \
>  	x++;
>  
>  /*
> @@ -85,6 +87,8 @@ DebayerCpu::~DebayerCpu()
>  	*dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \
>  	*dst++ = green_[curr[x] / (div)];                         \
>  	*dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> +	if constexpr (addAlphaByte)                               \
> +		*dst++ = 255;                                     \
>  	x++;
>  
>  /*
> @@ -96,6 +100,8 @@ DebayerCpu::~DebayerCpu()
>  	*dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
>  	*dst++ = green_[curr[x] / (div)];                          \
>  	*dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
> +	if constexpr (addAlphaByte)                                \
> +		*dst++ = 255;                                      \
>  	x++;
>  
>  /*
> @@ -107,8 +113,11 @@ DebayerCpu::~DebayerCpu()
>  	*dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
>  	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \
>  	*dst++ = red_[curr[x] / (div)];                                                        \
> +	if constexpr (addAlphaByte)                                                            \
> +		*dst++ = 255;                                                                  \
>  	x++;
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint8_t)
> @@ -119,6 +128,7 @@ void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint8_t)
> @@ -129,6 +139,7 @@ void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint16_t)
> @@ -140,6 +151,7 @@ void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint16_t)
> @@ -151,6 +163,7 @@ void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint16_t)
> @@ -162,6 +175,7 @@ void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	DECLARE_SRC_POINTERS(uint16_t)
> @@ -173,6 +187,7 @@ void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	const int widthInBytes = window_.width * 5 / 4;
> @@ -198,6 +213,7 @@ void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	const int widthInBytes = window_.width * 5 / 4;
> @@ -218,6 +234,7 @@ void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	const int widthInBytes = window_.width * 5 / 4;
> @@ -238,6 +255,7 @@ void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>  	}
>  }
>  
> +template<bool addAlphaByte>
>  void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
>  {
>  	const int widthInBytes = window_.width * 5 / 4;
> @@ -280,7 +298,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>  		config.bpp = (bayerFormat.bitDepth + 7) & ~7;
>  		config.patternSize.width = 2;
>  		config.patternSize.height = 2;
> -		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
> +		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
> +								  formats::XRGB8888,
> +								  formats::ARGB8888,
> +								  formats::BGR888,
> +								  formats::XBGR8888,
> +								  formats::ABGR8888 });
>  		return 0;
>  	}
>  
> @@ -290,7 +313,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>  		config.bpp = 10;
>  		config.patternSize.width = 4; /* 5 bytes per *4* pixels */
>  		config.patternSize.height = 2;
> -		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
> +		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
> +								  formats::XRGB8888,
> +								  formats::ARGB8888,
> +								  formats::BGR888,
> +								  formats::XBGR8888,
> +								  formats::ABGR8888 });
>  		return 0;
>  	}
>  
> @@ -306,6 +334,12 @@ int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &c
>  		return 0;
>  	}
>  
> +	if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
> +	    outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
> +		config.bpp = 32;
> +		return 0;
> +	}
> +
>  	LOG(Debayer, Info)
>  		<< "Unsupported output format " << outputFormat.toString();
>  	return -EINVAL;
> @@ -341,6 +375,7 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  {
>  	BayerFormat bayerFormat =
>  		BayerFormat::fromPixelFormat(inputFormat);
> +	bool addAlphaByte = false;
>  
>  	xShift_ = 0;
>  	swapRedBlueGains_ = false;
> @@ -351,8 +386,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  	};
>  
>  	switch (outputFormat) {
> +	case formats::XRGB8888:
> +	case formats::ARGB8888:
> +		addAlphaByte = true;
> +		[[fallthrough]];
>  	case formats::RGB888:
>  		break;
> +	case formats::XBGR8888:
> +	case formats::ABGR8888:
> +		addAlphaByte = true;
> +		[[fallthrough]];
>  	case formats::BGR888:
>  		/* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
>  		swapRedBlueGains_ = true;
> @@ -383,16 +426,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  	    isStandardBayerOrder(bayerFormat.order)) {
>  		switch (bayerFormat.bitDepth) {
>  		case 8:
> -			debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer8_BGBG_BGR888<true> : &DebayerCpu::debayer8_BGBG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer8_GRGR_BGR888<true> : &DebayerCpu::debayer8_GRGR_BGR888<false>;
>  			break;
>  		case 10:
> -			debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10_BGBG_BGR888<true> : &DebayerCpu::debayer10_BGBG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10_GRGR_BGR888<true> : &DebayerCpu::debayer10_GRGR_BGR888<false>;
>  			break;
>  		case 12:
> -			debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer12_BGBG_BGR888<true> : &DebayerCpu::debayer12_BGBG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer12_GRGR_BGR888<true> : &DebayerCpu::debayer12_GRGR_BGR888<false>;
>  			break;
>  		}
>  		setupStandardBayerOrder(bayerFormat.order);
> @@ -403,20 +446,20 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>  	    bayerFormat.packing == BayerFormat::Packing::CSI2) {
>  		switch (bayerFormat.order) {
>  		case BayerFormat::BGGR:
> -			debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
>  			return 0;
>  		case BayerFormat::GBRG:
> -			debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
>  			return 0;
>  		case BayerFormat::GRBG:
> -			debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
>  			return 0;
>  		case BayerFormat::RGGB:
> -			debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> -			debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> +			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
> +			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
>  			return 0;
>  		default:
>  			break;
> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
> index be7dcdca..1dac6435 100644
> --- a/src/libcamera/software_isp/debayer_cpu.h
> +++ b/src/libcamera/software_isp/debayer_cpu.h
> @@ -85,18 +85,28 @@ private:
>  	using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);
>  
>  	/* 8-bit raw bayer format */
> +	template<bool addAlphaByte>
>  	void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>  	/* unpacked 10-bit raw bayer format */
> +	template<bool addAlphaByte>
>  	void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>  	/* unpacked 12-bit raw bayer format */
> +	template<bool addAlphaByte>
>  	void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>  	/* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
> +	template<bool addAlphaByte>
>  	void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
> +	template<bool addAlphaByte>
>  	void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
>  
>  	struct DebayerInputConfig {
Kieran Bingham June 24, 2024, 11:49 a.m. UTC | #4
Quoting Hans de Goede (2024-06-24 10:37:52)
> Hi,
> 
> On 6/18/24 8:31 AM, Robert Mader wrote:
> > In order to be more compatible with modern hardware and APIs. This
> > notably allows GL implementations to directly import the buffers more
> > often and seems to be required for Wayland.
> > 
> > Further more, as we already enforce a 8 byte stride, these formats work
> > better for clients that don't support padding - such as libwebrtc at the
> > time of writing.
> > 
> > Tested devices:
> >  - Librem5
> >  - PinePhone
> >  - Thinkpad X13s
> > 
> > Signed-off-by: Robert Mader <robert.mader@collabora.com>
> > Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
> 
> Thanks, patch looks good to me:
> 
> Reviewed-by: Hans de Goede <hdegoede@redhat.com>

Thanks Hans,

I'm sorry I can't add your tag though as I merged this last week.
- https://git.libcamera.org/libcamera/libcamera.git/commit/?id=437e601653e69c82f5396979d99e7b9b5bb6086b

--
Kieran


> 
> Regards,
> 
> Hans
> 
> 
> 
> 
> > 
> > ---
> > 
> > Changes in v3:
> >  - Remove previously introduced variable again and use C++ templates
> >    instead in order to avoid runtime costs
> >  - Small cleanups and linting fixes
> > 
> > Changes in v2:
> >  - Reduce code duplication by using a runtime variable instead
> >  - Small cleanups
> > ---
> >  src/libcamera/software_isp/debayer_cpu.cpp | 75 +++++++++++++++++-----
> >  src/libcamera/software_isp/debayer_cpu.h   | 10 +++
> >  2 files changed, 69 insertions(+), 16 deletions(-)
> > 
> > diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> > index c038eed4..f8d2677d 100644
> > --- a/src/libcamera/software_isp/debayer_cpu.cpp
> > +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> > @@ -74,6 +74,8 @@ DebayerCpu::~DebayerCpu()
> >       *dst++ = blue_[curr[x] / (div)];                                                      \
> >       *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \
> >       *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> > +     if constexpr (addAlphaByte)                                                           \
> > +             *dst++ = 255;                                                                 \
> >       x++;
> >  
> >  /*
> > @@ -85,6 +87,8 @@ DebayerCpu::~DebayerCpu()
> >       *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \
> >       *dst++ = green_[curr[x] / (div)];                         \
> >       *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> > +     if constexpr (addAlphaByte)                               \
> > +             *dst++ = 255;                                     \
> >       x++;
> >  
> >  /*
> > @@ -96,6 +100,8 @@ DebayerCpu::~DebayerCpu()
> >       *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> >       *dst++ = green_[curr[x] / (div)];                          \
> >       *dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
> > +     if constexpr (addAlphaByte)                                \
> > +             *dst++ = 255;                                      \
> >       x++;
> >  
> >  /*
> > @@ -107,8 +113,11 @@ DebayerCpu::~DebayerCpu()
> >       *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> >       *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \
> >       *dst++ = red_[curr[x] / (div)];                                                        \
> > +     if constexpr (addAlphaByte)                                                            \
> > +             *dst++ = 255;                                                                  \
> >       x++;
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       DECLARE_SRC_POINTERS(uint8_t)
> > @@ -119,6 +128,7 @@ void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       DECLARE_SRC_POINTERS(uint8_t)
> > @@ -129,6 +139,7 @@ void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       DECLARE_SRC_POINTERS(uint16_t)
> > @@ -140,6 +151,7 @@ void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       DECLARE_SRC_POINTERS(uint16_t)
> > @@ -151,6 +163,7 @@ void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       DECLARE_SRC_POINTERS(uint16_t)
> > @@ -162,6 +175,7 @@ void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       DECLARE_SRC_POINTERS(uint16_t)
> > @@ -173,6 +187,7 @@ void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       const int widthInBytes = window_.width * 5 / 4;
> > @@ -198,6 +213,7 @@ void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       const int widthInBytes = window_.width * 5 / 4;
> > @@ -218,6 +234,7 @@ void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       const int widthInBytes = window_.width * 5 / 4;
> > @@ -238,6 +255,7 @@ void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
> >       }
> >  }
> >  
> > +template<bool addAlphaByte>
> >  void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
> >  {
> >       const int widthInBytes = window_.width * 5 / 4;
> > @@ -280,7 +298,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
> >               config.bpp = (bayerFormat.bitDepth + 7) & ~7;
> >               config.patternSize.width = 2;
> >               config.patternSize.height = 2;
> > -             config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
> > +             config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
> > +                                                               formats::XRGB8888,
> > +                                                               formats::ARGB8888,
> > +                                                               formats::BGR888,
> > +                                                               formats::XBGR8888,
> > +                                                               formats::ABGR8888 });
> >               return 0;
> >       }
> >  
> > @@ -290,7 +313,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
> >               config.bpp = 10;
> >               config.patternSize.width = 4; /* 5 bytes per *4* pixels */
> >               config.patternSize.height = 2;
> > -             config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
> > +             config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
> > +                                                               formats::XRGB8888,
> > +                                                               formats::ARGB8888,
> > +                                                               formats::BGR888,
> > +                                                               formats::XBGR8888,
> > +                                                               formats::ABGR8888 });
> >               return 0;
> >       }
> >  
> > @@ -306,6 +334,12 @@ int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &c
> >               return 0;
> >       }
> >  
> > +     if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
> > +         outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
> > +             config.bpp = 32;
> > +             return 0;
> > +     }
> > +
> >       LOG(Debayer, Info)
> >               << "Unsupported output format " << outputFormat.toString();
> >       return -EINVAL;
> > @@ -341,6 +375,7 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
> >  {
> >       BayerFormat bayerFormat =
> >               BayerFormat::fromPixelFormat(inputFormat);
> > +     bool addAlphaByte = false;
> >  
> >       xShift_ = 0;
> >       swapRedBlueGains_ = false;
> > @@ -351,8 +386,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
> >       };
> >  
> >       switch (outputFormat) {
> > +     case formats::XRGB8888:
> > +     case formats::ARGB8888:
> > +             addAlphaByte = true;
> > +             [[fallthrough]];
> >       case formats::RGB888:
> >               break;
> > +     case formats::XBGR8888:
> > +     case formats::ABGR8888:
> > +             addAlphaByte = true;
> > +             [[fallthrough]];
> >       case formats::BGR888:
> >               /* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
> >               swapRedBlueGains_ = true;
> > @@ -383,16 +426,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
> >           isStandardBayerOrder(bayerFormat.order)) {
> >               switch (bayerFormat.bitDepth) {
> >               case 8:
> > -                     debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888;
> > -                     debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888;
> > +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer8_BGBG_BGR888<true> : &DebayerCpu::debayer8_BGBG_BGR888<false>;
> > +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer8_GRGR_BGR888<true> : &DebayerCpu::debayer8_GRGR_BGR888<false>;
> >                       break;
> >               case 10:
> > -                     debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888;
> > -                     debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888;
> > +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10_BGBG_BGR888<true> : &DebayerCpu::debayer10_BGBG_BGR888<false>;
> > +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10_GRGR_BGR888<true> : &DebayerCpu::debayer10_GRGR_BGR888<false>;
> >                       break;
> >               case 12:
> > -                     debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888;
> > -                     debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888;
> > +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer12_BGBG_BGR888<true> : &DebayerCpu::debayer12_BGBG_BGR888<false>;
> > +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer12_GRGR_BGR888<true> : &DebayerCpu::debayer12_GRGR_BGR888<false>;
> >                       break;
> >               }
> >               setupStandardBayerOrder(bayerFormat.order);
> > @@ -403,20 +446,20 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
> >           bayerFormat.packing == BayerFormat::Packing::CSI2) {
> >               switch (bayerFormat.order) {
> >               case BayerFormat::BGGR:
> > -                     debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> > -                     debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> > +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
> > +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
> >                       return 0;
> >               case BayerFormat::GBRG:
> > -                     debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> > -                     debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> > +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
> > +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
> >                       return 0;
> >               case BayerFormat::GRBG:
> > -                     debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> > -                     debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> > +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
> > +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
> >                       return 0;
> >               case BayerFormat::RGGB:
> > -                     debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> > -                     debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> > +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
> > +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
> >                       return 0;
> >               default:
> >                       break;
> > diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
> > index be7dcdca..1dac6435 100644
> > --- a/src/libcamera/software_isp/debayer_cpu.h
> > +++ b/src/libcamera/software_isp/debayer_cpu.h
> > @@ -85,18 +85,28 @@ private:
> >       using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);
> >  
> >       /* 8-bit raw bayer format */
> > +     template<bool addAlphaByte>
> >       void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> > +     template<bool addAlphaByte>
> >       void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> >       /* unpacked 10-bit raw bayer format */
> > +     template<bool addAlphaByte>
> >       void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> > +     template<bool addAlphaByte>
> >       void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> >       /* unpacked 12-bit raw bayer format */
> > +     template<bool addAlphaByte>
> >       void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> > +     template<bool addAlphaByte>
> >       void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> >       /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
> > +     template<bool addAlphaByte>
> >       void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> > +     template<bool addAlphaByte>
> >       void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> > +     template<bool addAlphaByte>
> >       void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
> > +     template<bool addAlphaByte>
> >       void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
> >  
> >       struct DebayerInputConfig {
>
Hans de Goede June 24, 2024, 12:33 p.m. UTC | #5
Hi,

On 6/24/24 1:49 PM, Kieran Bingham wrote:
> Quoting Hans de Goede (2024-06-24 10:37:52)
>> Hi,
>>
>> On 6/18/24 8:31 AM, Robert Mader wrote:
>>> In order to be more compatible with modern hardware and APIs. This
>>> notably allows GL implementations to directly import the buffers more
>>> often and seems to be required for Wayland.
>>>
>>> Further more, as we already enforce a 8 byte stride, these formats work
>>> better for clients that don't support padding - such as libwebrtc at the
>>> time of writing.
>>>
>>> Tested devices:
>>>  - Librem5
>>>  - PinePhone
>>>  - Thinkpad X13s
>>>
>>> Signed-off-by: Robert Mader <robert.mader@collabora.com>
>>> Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
>>
>> Thanks, patch looks good to me:
>>
>> Reviewed-by: Hans de Goede <hdegoede@redhat.com>
> 
> Thanks Hans,
> 
> I'm sorry I can't add your tag though as I merged this last week.
> - https://git.libcamera.org/libcamera/libcamera.git/commit/?id=437e601653e69c82f5396979d99e7b9b5bb6086b

No worries, good that it is already merged :)

Regards,

Hans




>>> ---
>>>
>>> Changes in v3:
>>>  - Remove previously introduced variable again and use C++ templates
>>>    instead in order to avoid runtime costs
>>>  - Small cleanups and linting fixes
>>>
>>> Changes in v2:
>>>  - Reduce code duplication by using a runtime variable instead
>>>  - Small cleanups
>>> ---
>>>  src/libcamera/software_isp/debayer_cpu.cpp | 75 +++++++++++++++++-----
>>>  src/libcamera/software_isp/debayer_cpu.h   | 10 +++
>>>  2 files changed, 69 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
>>> index c038eed4..f8d2677d 100644
>>> --- a/src/libcamera/software_isp/debayer_cpu.cpp
>>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
>>> @@ -74,6 +74,8 @@ DebayerCpu::~DebayerCpu()
>>>       *dst++ = blue_[curr[x] / (div)];                                                      \
>>>       *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \
>>>       *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
>>> +     if constexpr (addAlphaByte)                                                           \
>>> +             *dst++ = 255;                                                                 \
>>>       x++;
>>>  
>>>  /*
>>> @@ -85,6 +87,8 @@ DebayerCpu::~DebayerCpu()
>>>       *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \
>>>       *dst++ = green_[curr[x] / (div)];                         \
>>>       *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
>>> +     if constexpr (addAlphaByte)                               \
>>> +             *dst++ = 255;                                     \
>>>       x++;
>>>  
>>>  /*
>>> @@ -96,6 +100,8 @@ DebayerCpu::~DebayerCpu()
>>>       *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
>>>       *dst++ = green_[curr[x] / (div)];                          \
>>>       *dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
>>> +     if constexpr (addAlphaByte)                                \
>>> +             *dst++ = 255;                                      \
>>>       x++;
>>>  
>>>  /*
>>> @@ -107,8 +113,11 @@ DebayerCpu::~DebayerCpu()
>>>       *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
>>>       *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \
>>>       *dst++ = red_[curr[x] / (div)];                                                        \
>>> +     if constexpr (addAlphaByte)                                                            \
>>> +             *dst++ = 255;                                                                  \
>>>       x++;
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       DECLARE_SRC_POINTERS(uint8_t)
>>> @@ -119,6 +128,7 @@ void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       DECLARE_SRC_POINTERS(uint8_t)
>>> @@ -129,6 +139,7 @@ void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       DECLARE_SRC_POINTERS(uint16_t)
>>> @@ -140,6 +151,7 @@ void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       DECLARE_SRC_POINTERS(uint16_t)
>>> @@ -151,6 +163,7 @@ void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       DECLARE_SRC_POINTERS(uint16_t)
>>> @@ -162,6 +175,7 @@ void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       DECLARE_SRC_POINTERS(uint16_t)
>>> @@ -173,6 +187,7 @@ void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       const int widthInBytes = window_.width * 5 / 4;
>>> @@ -198,6 +213,7 @@ void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       const int widthInBytes = window_.width * 5 / 4;
>>> @@ -218,6 +234,7 @@ void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       const int widthInBytes = window_.width * 5 / 4;
>>> @@ -238,6 +255,7 @@ void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>>>       }
>>>  }
>>>  
>>> +template<bool addAlphaByte>
>>>  void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
>>>  {
>>>       const int widthInBytes = window_.width * 5 / 4;
>>> @@ -280,7 +298,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>>>               config.bpp = (bayerFormat.bitDepth + 7) & ~7;
>>>               config.patternSize.width = 2;
>>>               config.patternSize.height = 2;
>>> -             config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
>>> +             config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
>>> +                                                               formats::XRGB8888,
>>> +                                                               formats::ARGB8888,
>>> +                                                               formats::BGR888,
>>> +                                                               formats::XBGR8888,
>>> +                                                               formats::ABGR8888 });
>>>               return 0;
>>>       }
>>>  
>>> @@ -290,7 +313,12 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>>>               config.bpp = 10;
>>>               config.patternSize.width = 4; /* 5 bytes per *4* pixels */
>>>               config.patternSize.height = 2;
>>> -             config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
>>> +             config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
>>> +                                                               formats::XRGB8888,
>>> +                                                               formats::ARGB8888,
>>> +                                                               formats::BGR888,
>>> +                                                               formats::XBGR8888,
>>> +                                                               formats::ABGR8888 });
>>>               return 0;
>>>       }
>>>  
>>> @@ -306,6 +334,12 @@ int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &c
>>>               return 0;
>>>       }
>>>  
>>> +     if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
>>> +         outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
>>> +             config.bpp = 32;
>>> +             return 0;
>>> +     }
>>> +
>>>       LOG(Debayer, Info)
>>>               << "Unsupported output format " << outputFormat.toString();
>>>       return -EINVAL;
>>> @@ -341,6 +375,7 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>>>  {
>>>       BayerFormat bayerFormat =
>>>               BayerFormat::fromPixelFormat(inputFormat);
>>> +     bool addAlphaByte = false;
>>>  
>>>       xShift_ = 0;
>>>       swapRedBlueGains_ = false;
>>> @@ -351,8 +386,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>>>       };
>>>  
>>>       switch (outputFormat) {
>>> +     case formats::XRGB8888:
>>> +     case formats::ARGB8888:
>>> +             addAlphaByte = true;
>>> +             [[fallthrough]];
>>>       case formats::RGB888:
>>>               break;
>>> +     case formats::XBGR8888:
>>> +     case formats::ABGR8888:
>>> +             addAlphaByte = true;
>>> +             [[fallthrough]];
>>>       case formats::BGR888:
>>>               /* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
>>>               swapRedBlueGains_ = true;
>>> @@ -383,16 +426,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>>>           isStandardBayerOrder(bayerFormat.order)) {
>>>               switch (bayerFormat.bitDepth) {
>>>               case 8:
>>> -                     debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888;
>>> -                     debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888;
>>> +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer8_BGBG_BGR888<true> : &DebayerCpu::debayer8_BGBG_BGR888<false>;
>>> +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer8_GRGR_BGR888<true> : &DebayerCpu::debayer8_GRGR_BGR888<false>;
>>>                       break;
>>>               case 10:
>>> -                     debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888;
>>> -                     debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888;
>>> +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10_BGBG_BGR888<true> : &DebayerCpu::debayer10_BGBG_BGR888<false>;
>>> +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10_GRGR_BGR888<true> : &DebayerCpu::debayer10_GRGR_BGR888<false>;
>>>                       break;
>>>               case 12:
>>> -                     debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888;
>>> -                     debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888;
>>> +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer12_BGBG_BGR888<true> : &DebayerCpu::debayer12_BGBG_BGR888<false>;
>>> +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer12_GRGR_BGR888<true> : &DebayerCpu::debayer12_GRGR_BGR888<false>;
>>>                       break;
>>>               }
>>>               setupStandardBayerOrder(bayerFormat.order);
>>> @@ -403,20 +446,20 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>>>           bayerFormat.packing == BayerFormat::Packing::CSI2) {
>>>               switch (bayerFormat.order) {
>>>               case BayerFormat::BGGR:
>>> -                     debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
>>> -                     debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
>>> +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
>>> +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
>>>                       return 0;
>>>               case BayerFormat::GBRG:
>>> -                     debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
>>> -                     debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
>>> +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
>>> +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
>>>                       return 0;
>>>               case BayerFormat::GRBG:
>>> -                     debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
>>> -                     debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
>>> +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
>>> +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
>>>                       return 0;
>>>               case BayerFormat::RGGB:
>>> -                     debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
>>> -                     debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
>>> +                     debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
>>> +                     debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
>>>                       return 0;
>>>               default:
>>>                       break;
>>> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
>>> index be7dcdca..1dac6435 100644
>>> --- a/src/libcamera/software_isp/debayer_cpu.h
>>> +++ b/src/libcamera/software_isp/debayer_cpu.h
>>> @@ -85,18 +85,28 @@ private:
>>>       using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);
>>>  
>>>       /* 8-bit raw bayer format */
>>> +     template<bool addAlphaByte>
>>>       void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
>>> +     template<bool addAlphaByte>
>>>       void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>>>       /* unpacked 10-bit raw bayer format */
>>> +     template<bool addAlphaByte>
>>>       void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
>>> +     template<bool addAlphaByte>
>>>       void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>>>       /* unpacked 12-bit raw bayer format */
>>> +     template<bool addAlphaByte>
>>>       void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
>>> +     template<bool addAlphaByte>
>>>       void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>>>       /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
>>> +     template<bool addAlphaByte>
>>>       void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
>>> +     template<bool addAlphaByte>
>>>       void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
>>> +     template<bool addAlphaByte>
>>>       void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
>>> +     template<bool addAlphaByte>
>>>       void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
>>>  
>>>       struct DebayerInputConfig {
>>
>

Patch
diff mbox series

diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
index c038eed4..f8d2677d 100644
--- a/src/libcamera/software_isp/debayer_cpu.cpp
+++ b/src/libcamera/software_isp/debayer_cpu.cpp
@@ -74,6 +74,8 @@  DebayerCpu::~DebayerCpu()
 	*dst++ = blue_[curr[x] / (div)];                                                      \
 	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \
 	*dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
+	if constexpr (addAlphaByte)                                                           \
+		*dst++ = 255;                                                                 \
 	x++;
 
 /*
@@ -85,6 +87,8 @@  DebayerCpu::~DebayerCpu()
 	*dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \
 	*dst++ = green_[curr[x] / (div)];                         \
 	*dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
+	if constexpr (addAlphaByte)                               \
+		*dst++ = 255;                                     \
 	x++;
 
 /*
@@ -96,6 +100,8 @@  DebayerCpu::~DebayerCpu()
 	*dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
 	*dst++ = green_[curr[x] / (div)];                          \
 	*dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
+	if constexpr (addAlphaByte)                                \
+		*dst++ = 255;                                      \
 	x++;
 
 /*
@@ -107,8 +113,11 @@  DebayerCpu::~DebayerCpu()
 	*dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
 	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \
 	*dst++ = red_[curr[x] / (div)];                                                        \
+	if constexpr (addAlphaByte)                                                            \
+		*dst++ = 255;                                                                  \
 	x++;
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint8_t)
@@ -119,6 +128,7 @@  void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint8_t)
@@ -129,6 +139,7 @@  void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint16_t)
@@ -140,6 +151,7 @@  void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint16_t)
@@ -151,6 +163,7 @@  void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint16_t)
@@ -162,6 +175,7 @@  void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint16_t)
@@ -173,6 +187,7 @@  void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	const int widthInBytes = window_.width * 5 / 4;
@@ -198,6 +213,7 @@  void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	const int widthInBytes = window_.width * 5 / 4;
@@ -218,6 +234,7 @@  void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	const int widthInBytes = window_.width * 5 / 4;
@@ -238,6 +255,7 @@  void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+template<bool addAlphaByte>
 void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	const int widthInBytes = window_.width * 5 / 4;
@@ -280,7 +298,12 @@  int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
 		config.bpp = (bayerFormat.bitDepth + 7) & ~7;
 		config.patternSize.width = 2;
 		config.patternSize.height = 2;
-		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
+		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
+								  formats::XRGB8888,
+								  formats::ARGB8888,
+								  formats::BGR888,
+								  formats::XBGR8888,
+								  formats::ABGR8888 });
 		return 0;
 	}
 
@@ -290,7 +313,12 @@  int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
 		config.bpp = 10;
 		config.patternSize.width = 4; /* 5 bytes per *4* pixels */
 		config.patternSize.height = 2;
-		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
+		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
+								  formats::XRGB8888,
+								  formats::ARGB8888,
+								  formats::BGR888,
+								  formats::XBGR8888,
+								  formats::ABGR8888 });
 		return 0;
 	}
 
@@ -306,6 +334,12 @@  int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &c
 		return 0;
 	}
 
+	if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
+	    outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
+		config.bpp = 32;
+		return 0;
+	}
+
 	LOG(Debayer, Info)
 		<< "Unsupported output format " << outputFormat.toString();
 	return -EINVAL;
@@ -341,6 +375,7 @@  int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
 {
 	BayerFormat bayerFormat =
 		BayerFormat::fromPixelFormat(inputFormat);
+	bool addAlphaByte = false;
 
 	xShift_ = 0;
 	swapRedBlueGains_ = false;
@@ -351,8 +386,16 @@  int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
 	};
 
 	switch (outputFormat) {
+	case formats::XRGB8888:
+	case formats::ARGB8888:
+		addAlphaByte = true;
+		[[fallthrough]];
 	case formats::RGB888:
 		break;
+	case formats::XBGR8888:
+	case formats::ABGR8888:
+		addAlphaByte = true;
+		[[fallthrough]];
 	case formats::BGR888:
 		/* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
 		swapRedBlueGains_ = true;
@@ -383,16 +426,16 @@  int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
 	    isStandardBayerOrder(bayerFormat.order)) {
 		switch (bayerFormat.bitDepth) {
 		case 8:
-			debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888;
-			debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888;
+			debayer0_ = addAlphaByte ? &DebayerCpu::debayer8_BGBG_BGR888<true> : &DebayerCpu::debayer8_BGBG_BGR888<false>;
+			debayer1_ = addAlphaByte ? &DebayerCpu::debayer8_GRGR_BGR888<true> : &DebayerCpu::debayer8_GRGR_BGR888<false>;
 			break;
 		case 10:
-			debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888;
-			debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888;
+			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10_BGBG_BGR888<true> : &DebayerCpu::debayer10_BGBG_BGR888<false>;
+			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10_GRGR_BGR888<true> : &DebayerCpu::debayer10_GRGR_BGR888<false>;
 			break;
 		case 12:
-			debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888;
-			debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888;
+			debayer0_ = addAlphaByte ? &DebayerCpu::debayer12_BGBG_BGR888<true> : &DebayerCpu::debayer12_BGBG_BGR888<false>;
+			debayer1_ = addAlphaByte ? &DebayerCpu::debayer12_GRGR_BGR888<true> : &DebayerCpu::debayer12_GRGR_BGR888<false>;
 			break;
 		}
 		setupStandardBayerOrder(bayerFormat.order);
@@ -403,20 +446,20 @@  int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
 	    bayerFormat.packing == BayerFormat::Packing::CSI2) {
 		switch (bayerFormat.order) {
 		case BayerFormat::BGGR:
-			debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
-			debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
+			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
+			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
 			return 0;
 		case BayerFormat::GBRG:
-			debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
-			debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
+			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
+			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
 			return 0;
 		case BayerFormat::GRBG:
-			debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
-			debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
+			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_GRGR_BGR888<true> : &DebayerCpu::debayer10P_GRGR_BGR888<false>;
+			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_BGBG_BGR888<true> : &DebayerCpu::debayer10P_BGBG_BGR888<false>;
 			return 0;
 		case BayerFormat::RGGB:
-			debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
-			debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
+			debayer0_ = addAlphaByte ? &DebayerCpu::debayer10P_RGRG_BGR888<true> : &DebayerCpu::debayer10P_RGRG_BGR888<false>;
+			debayer1_ = addAlphaByte ? &DebayerCpu::debayer10P_GBGB_BGR888<true> : &DebayerCpu::debayer10P_GBGB_BGR888<false>;
 			return 0;
 		default:
 			break;
diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
index be7dcdca..1dac6435 100644
--- a/src/libcamera/software_isp/debayer_cpu.h
+++ b/src/libcamera/software_isp/debayer_cpu.h
@@ -85,18 +85,28 @@  private:
 	using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);
 
 	/* 8-bit raw bayer format */
+	template<bool addAlphaByte>
 	void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	template<bool addAlphaByte>
 	void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
 	/* unpacked 10-bit raw bayer format */
+	template<bool addAlphaByte>
 	void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	template<bool addAlphaByte>
 	void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
 	/* unpacked 12-bit raw bayer format */
+	template<bool addAlphaByte>
 	void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	template<bool addAlphaByte>
 	void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
 	/* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
+	template<bool addAlphaByte>
 	void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	template<bool addAlphaByte>
 	void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+	template<bool addAlphaByte>
 	void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
+	template<bool addAlphaByte>
 	void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
 
 	struct DebayerInputConfig {