diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
index c038eed4..73c66a88 100644
--- a/src/libcamera/software_isp/debayer_cpu.cpp
+++ b/src/libcamera/software_isp/debayer_cpu.cpp
@@ -76,6 +76,13 @@ DebayerCpu::~DebayerCpu()
 	*dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
 	x++;
 
+#define BGGR_XBGR8888(p, n, div)                                                              \
+	*dst++ = blue_[curr[x] / (div)];                                                      \
+	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \
+	*dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
+	*dst++ = 255;                                                                         \
+	x++;
+
 /*
  * GBG
  * RGR
@@ -87,6 +94,13 @@ DebayerCpu::~DebayerCpu()
 	*dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
 	x++;
 
+#define GRBG_XBGR8888(p, n, div)                                  \
+	*dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \
+	*dst++ = green_[curr[x] / (div)];                         \
+	*dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
+	*dst++ = 255;                                             \
+	x++;
+
 /*
  * GRG
  * BGB
@@ -98,6 +112,13 @@ DebayerCpu::~DebayerCpu()
 	*dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
 	x++;
 
+#define GBRG_XBGR8888(p, n, div)                                   \
+	*dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
+	*dst++ = green_[curr[x] / (div)];                          \
+	*dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \
+	*dst++ = 255;                                              \
+	x++;
+
 /*
  * BGB
  * GRG
@@ -109,6 +130,13 @@ DebayerCpu::~DebayerCpu()
 	*dst++ = red_[curr[x] / (div)];                                                        \
 	x++;
 
+#define RGGB_XBGR8888(p, n, div)                                                               \
+	*dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
+	*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \
+	*dst++ = red_[curr[x] / (div)];                                                        \
+	*dst++ = 255;                                                                          \
+	x++;
+
 void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint8_t)
@@ -119,6 +147,16 @@ void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer8_BGBG_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	DECLARE_SRC_POINTERS(uint8_t)
+
+	for (int x = 0; x < (int)window_.width;) {
+		BGGR_XBGR8888(1, 1, 1)
+		GBRG_XBGR8888(1, 1, 1)
+	}
+}
+
 void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint8_t)
@@ -129,6 +167,16 @@ void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer8_GRGR_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	DECLARE_SRC_POINTERS(uint8_t)
+
+	for (int x = 0; x < (int)window_.width;) {
+		GRBG_XBGR8888(1, 1, 1)
+		RGGB_XBGR8888(1, 1, 1)
+	}
+}
+
 void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint16_t)
@@ -140,6 +188,17 @@ void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer10_BGBG_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	DECLARE_SRC_POINTERS(uint16_t)
+
+	for (int x = 0; x < (int)window_.width;) {
+		/* divide values by 4 for 10 -> 8 bpp value */
+		BGGR_XBGR8888(1, 1, 4)
+		GBRG_XBGR8888(1, 1, 4)
+	}
+}
+
 void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint16_t)
@@ -151,6 +210,17 @@ void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer10_GRGR_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	DECLARE_SRC_POINTERS(uint16_t)
+
+	for (int x = 0; x < (int)window_.width;) {
+		/* divide values by 4 for 10 -> 8 bpp value */
+		GRBG_XBGR8888(1, 1, 4)
+		RGGB_XBGR8888(1, 1, 4)
+	}
+}
+
 void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint16_t)
@@ -162,6 +232,17 @@ void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer12_BGBG_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	DECLARE_SRC_POINTERS(uint16_t)
+
+	for (int x = 0; x < (int)window_.width;) {
+		/* divide values by 16 for 12 -> 8 bpp value */
+		BGGR_XBGR8888(1, 1, 16)
+		GBRG_XBGR8888(1, 1, 16)
+	}
+}
+
 void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	DECLARE_SRC_POINTERS(uint16_t)
@@ -173,6 +254,17 @@ void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer12_GRGR_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	DECLARE_SRC_POINTERS(uint16_t)
+
+	for (int x = 0; x < (int)window_.width;) {
+		/* divide values by 16 for 12 -> 8 bpp value */
+		GRBG_XBGR8888(1, 1, 16)
+		RGGB_XBGR8888(1, 1, 16)
+	}
+}
+
 void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	const int widthInBytes = window_.width * 5 / 4;
@@ -198,6 +290,31 @@ void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer10P_BGBG_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	const int widthInBytes = window_.width * 5 / 4;
+	const uint8_t *prev = src[0];
+	const uint8_t *curr = src[1];
+	const uint8_t *next = src[2];
+
+	/*
+	 * For the first pixel getting a pixel from the previous column uses
+	 * x - 2 to skip the 5th byte with least-significant bits for 4 pixels.
+	 * Same for last pixel (uses x + 2) and looking at the next column.
+	 */
+	for (int x = 0; x < widthInBytes;) {
+		/* First pixel */
+		BGGR_XBGR8888(2, 1, 1)
+		/* Second pixel BGGR -> GBRG */
+		GBRG_XBGR8888(1, 1, 1)
+		/* Same thing for third and fourth pixels */
+		BGGR_XBGR8888(1, 1, 1)
+		GBRG_XBGR8888(1, 2, 1)
+		/* Skip 5th src byte with 4 x 2 least-significant-bits */
+		x++;
+	}
+}
+
 void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	const int widthInBytes = window_.width * 5 / 4;
@@ -218,6 +335,26 @@ void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer10P_GRGR_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	const int widthInBytes = window_.width * 5 / 4;
+	const uint8_t *prev = src[0];
+	const uint8_t *curr = src[1];
+	const uint8_t *next = src[2];
+
+	for (int x = 0; x < widthInBytes;) {
+		/* First pixel */
+		GRBG_XBGR8888(2, 1, 1)
+		/* Second pixel GRBG -> RGGB */
+		RGGB_XBGR8888(1, 1, 1)
+		/* Same thing for third and fourth pixels */
+		GRBG_XBGR8888(1, 1, 1)
+		RGGB_XBGR8888(1, 2, 1)
+		/* Skip 5th src byte with 4 x 2 least-significant-bits */
+		x++;
+	}
+}
+
 void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	const int widthInBytes = window_.width * 5 / 4;
@@ -238,6 +375,26 @@ void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer10P_GBGB_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	const int widthInBytes = window_.width * 5 / 4;
+	const uint8_t *prev = src[0];
+	const uint8_t *curr = src[1];
+	const uint8_t *next = src[2];
+
+	for (int x = 0; x < widthInBytes;) {
+		/* Even pixel */
+		GBRG_XBGR8888(2, 1, 1)
+		/* Odd pixel GBGR -> BGGR */
+		BGGR_XBGR8888(1, 1, 1)
+		/* Same thing for next 2 pixels */
+		GBRG_XBGR8888(1, 1, 1)
+		BGGR_XBGR8888(1, 2, 1)
+		/* Skip 5th src byte with 4 x 2 least-significant-bits */
+		x++;
+	}
+}
+
 void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
 {
 	const int widthInBytes = window_.width * 5 / 4;
@@ -258,6 +415,26 @@ void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
 	}
 }
 
+void DebayerCpu::debayer10P_RGRG_XBGR8888(uint8_t *dst, const uint8_t *src[])
+{
+	const int widthInBytes = window_.width * 5 / 4;
+	const uint8_t *prev = src[0];
+	const uint8_t *curr = src[1];
+	const uint8_t *next = src[2];
+
+	for (int x = 0; x < widthInBytes;) {
+		/* Even pixel */
+		RGGB_XBGR8888(2, 1, 1)
+		/* Odd pixel RGGB -> GRBG */
+		GRBG_XBGR8888(1, 1, 1)
+		/* Same thing for next 2 pixels */
+		RGGB_XBGR8888(1, 1, 1)
+		GRBG_XBGR8888(1, 2, 1)
+		/* Skip 5th src byte with 4 x 2 least-significant-bits */
+		x++;
+	}
+}
+
 static bool isStandardBayerOrder(BayerFormat::Order order)
 {
 	return order == BayerFormat::BGGR || order == BayerFormat::GBRG ||
@@ -280,7 +457,14 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
 		config.bpp = (bayerFormat.bitDepth + 7) & ~7;
 		config.patternSize.width = 2;
 		config.patternSize.height = 2;
-		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
+		config.outputFormats = std::vector<PixelFormat>({
+			formats::RGB888,
+			formats::XRGB8888,
+			formats::ARGB8888,
+			formats::BGR888,
+			formats::XBGR8888,
+			formats::ABGR8888
+		});
 		return 0;
 	}
 
@@ -290,7 +474,14 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
 		config.bpp = 10;
 		config.patternSize.width = 4; /* 5 bytes per *4* pixels */
 		config.patternSize.height = 2;
-		config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
+		config.outputFormats = std::vector<PixelFormat>({
+			formats::RGB888,
+			formats::XRGB8888,
+			formats::ARGB8888,
+			formats::BGR888,
+			formats::XBGR8888,
+			formats::ABGR8888
+		});
 		return 0;
 	}
 
@@ -306,6 +497,12 @@ int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &c
 		return 0;
 	}
 
+	if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
+	    outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
+		config.bpp = 32;
+		return 0;
+	}
+
 	LOG(Debayer, Info)
 		<< "Unsupported output format " << outputFormat.toString();
 	return -EINVAL;
@@ -341,6 +538,7 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
 {
 	BayerFormat bayerFormat =
 		BayerFormat::fromPixelFormat(inputFormat);
+	bool is_aligned = false;
 
 	xShift_ = 0;
 	swapRedBlueGains_ = false;
@@ -351,8 +549,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
 	};
 
 	switch (outputFormat) {
+	case formats::XRGB8888:
+	case formats::ARGB8888:
+	  is_aligned = true;
+	  [[fallthrough]];
 	case formats::RGB888:
 		break;
+	case formats::XBGR8888:
+	case formats::ABGR8888:
+	  is_aligned = true;
+	  [[fallthrough]];
 	case formats::BGR888:
 		/* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
 		swapRedBlueGains_ = true;
@@ -383,16 +589,19 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
 	    isStandardBayerOrder(bayerFormat.order)) {
 		switch (bayerFormat.bitDepth) {
 		case 8:
-			debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888;
-			debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888;
+		  LOG(Debayer, Warning) << "8bit no packing";
+		  debayer0_ = is_aligned ? &DebayerCpu::debayer8_BGBG_XBGR8888 : &DebayerCpu::debayer8_BGBG_BGR888;
+		  debayer1_ = is_aligned ? &DebayerCpu::debayer8_GRGR_XBGR8888 : &DebayerCpu::debayer8_GRGR_BGR888;
 			break;
 		case 10:
-			debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888;
-			debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888;
+		  LOG(Debayer, Warning) << "10bit no packing";
+			debayer0_ = is_aligned ? &DebayerCpu::debayer10_BGBG_XBGR8888 : &DebayerCpu::debayer10_BGBG_BGR888;
+			debayer1_ = is_aligned ? &DebayerCpu::debayer10_GRGR_XBGR8888 : &DebayerCpu::debayer10_GRGR_BGR888;
 			break;
 		case 12:
-			debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888;
-			debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888;
+		  LOG(Debayer, Warning) << "12bit no packing";
+			debayer0_ = is_aligned ? &DebayerCpu::debayer12_BGBG_XBGR8888 : &DebayerCpu::debayer12_BGBG_BGR888;
+			debayer1_ = is_aligned ? &DebayerCpu::debayer12_GRGR_XBGR8888 : &DebayerCpu::debayer12_GRGR_BGR888;
 			break;
 		}
 		setupStandardBayerOrder(bayerFormat.order);
@@ -401,22 +610,23 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
 
 	if (bayerFormat.bitDepth == 10 &&
 	    bayerFormat.packing == BayerFormat::Packing::CSI2) {
+	  LOG(Debayer, Warning) << "10bit csi2";
 		switch (bayerFormat.order) {
 		case BayerFormat::BGGR:
-			debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
-			debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
+			debayer0_ = is_aligned ? &DebayerCpu::debayer10P_BGBG_XBGR8888 : &DebayerCpu::debayer10P_BGBG_BGR888;
+			debayer1_ = is_aligned ? &DebayerCpu::debayer10P_GRGR_XBGR8888 : &DebayerCpu::debayer10P_GRGR_BGR888;
 			return 0;
 		case BayerFormat::GBRG:
-			debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
-			debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
+			debayer0_ = is_aligned ? &DebayerCpu::debayer10P_GBGB_XBGR8888 : &DebayerCpu::debayer10P_GBGB_BGR888;
+			debayer1_ = is_aligned ? &DebayerCpu::debayer10P_RGRG_XBGR8888 : &DebayerCpu::debayer10P_RGRG_BGR888;
 			return 0;
 		case BayerFormat::GRBG:
-			debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
-			debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
+			debayer0_ = is_aligned ? &DebayerCpu::debayer10P_GRGR_XBGR8888 : &DebayerCpu::debayer10P_GRGR_BGR888;
+			debayer1_ = is_aligned ? &DebayerCpu::debayer10P_BGBG_XBGR8888 : &DebayerCpu::debayer10P_BGBG_BGR888;
 			return 0;
 		case BayerFormat::RGGB:
-			debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
-			debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
+			debayer0_ = is_aligned ? &DebayerCpu::debayer10P_RGRG_XBGR8888 : &DebayerCpu::debayer10P_RGRG_BGR888;
+			debayer1_ = is_aligned ? &DebayerCpu::debayer10P_GBGB_XBGR8888 : &DebayerCpu::debayer10P_GBGB_BGR888;
 			return 0;
 		default:
 			break;
@@ -533,6 +743,8 @@ DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size
 	/* round up to multiple of 8 for 64 bits alignment */
 	unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;
 
+	LOG(Debayer, Warning) << outputFormat.toString() << " " << size.width << " " << size.height << " " << config.bpp << " " << stride << " " << stride * size.height;
+
 	return std::make_tuple(stride, stride * size.height);
 }
 
diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
index be7dcdca..c30f44aa 100644
--- a/src/libcamera/software_isp/debayer_cpu.h
+++ b/src/libcamera/software_isp/debayer_cpu.h
@@ -86,18 +86,28 @@ private:
 
 	/* 8-bit raw bayer format */
 	void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer8_BGBG_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer8_GRGR_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	/* unpacked 10-bit raw bayer format */
 	void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer10_BGBG_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer10_GRGR_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	/* unpacked 12-bit raw bayer format */
 	void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer12_BGBG_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer12_GRGR_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	/* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
 	void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer10P_BGBG_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer10P_GRGR_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer10P_GBGB_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 	void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
+	void debayer10P_RGRG_XBGR8888(uint8_t *dst, const uint8_t *src[]);
 
 	struct DebayerInputConfig {
 		Size patternSize;
