diff --git a/include/libcamera/internal/v4l2_subdevice.h b/include/libcamera/internal/v4l2_subdevice.h
index 69862de0585a..576faf971a05 100644
--- a/include/libcamera/internal/v4l2_subdevice.h
+++ b/include/libcamera/internal/v4l2_subdevice.h
@@ -20,6 +20,7 @@
 
 #include <libcamera/color_space.h>
 #include <libcamera/geometry.h>
+#include <libcamera/transform.h>
 
 #include "libcamera/internal/formats.h"
 #include "libcamera/internal/media_object.h"
@@ -44,6 +45,7 @@ struct V4L2SubdeviceFormat {
 	uint32_t mbus_code;
 	Size size;
 	std::optional<ColorSpace> colorSpace;
+	Transform transform = Transform::Identity;
 
 	const std::string toString() const;
 	uint8_t bitsPerPixel() const;
diff --git a/src/libcamera/camera_sensor.cpp b/src/libcamera/camera_sensor.cpp
index 3518d3e3b02a..6d5c2317e0fe 100644
--- a/src/libcamera/camera_sensor.cpp
+++ b/src/libcamera/camera_sensor.cpp
@@ -750,6 +750,7 @@ V4L2SubdeviceFormat CameraSensor::getFormat(const std::vector<unsigned int> &mbu
 		.mbus_code = bestCode,
 		.size = *bestSize,
 		.colorSpace = ColorSpace::Raw,
+		.transform = Transform::Identity,
 	};
 
 	return format;
@@ -759,12 +760,34 @@ V4L2SubdeviceFormat CameraSensor::getFormat(const std::vector<unsigned int> &mbu
  * \brief Set the sensor output format
  * \param[in] format The desired sensor output format
  *
+ * If flips are writable they are configured according to the desired Transform.
+ * Transform::Identity always corresponds to H/V flip being disabled if the
+ * controls are writable. Flips are set before the new format is applied as
+ * they can effectively change the Bayer pattern ordering.
+ *
  * The ranges of any controls associated with the sensor are also updated.
  *
  * \return 0 on success or a negative error code otherwise
  */
 int CameraSensor::setFormat(V4L2SubdeviceFormat *format)
 {
+	/* Configure flips if the sensor supports that. */
+	if (supportFlips_) {
+		ControlList flipCtrls(subdev_->controls());
+
+		flipCtrls.set(V4L2_CID_HFLIP,
+			      static_cast<int32_t>(!!(format->transform &
+						      Transform::HFlip)));
+		flipCtrls.set(V4L2_CID_VFLIP,
+			      static_cast<int32_t>(!!(format->transform &
+						      Transform::VFlip)));
+
+		int ret = subdev_->setControls(&flipCtrls);
+		if (ret)
+			return ret;
+	}
+
+	/* Apply format on the subdev. */
 	int ret = subdev_->setFormat(pad_, format);
 	if (ret)
 		return ret;
diff --git a/src/libcamera/pipeline/ipu3/cio2.cpp b/src/libcamera/pipeline/ipu3/cio2.cpp
index d4e523af24b4..a819884f762d 100644
--- a/src/libcamera/pipeline/ipu3/cio2.cpp
+++ b/src/libcamera/pipeline/ipu3/cio2.cpp
@@ -15,6 +15,7 @@
 #include <libcamera/formats.h>
 #include <libcamera/geometry.h>
 #include <libcamera/stream.h>
+#include <libcamera/transform.h>
 
 #include "libcamera/internal/camera_sensor.h"
 #include "libcamera/internal/framebuffer.h"
@@ -177,10 +178,12 @@ int CIO2Device::init(const MediaDevice *media, unsigned int index)
 /**
  * \brief Configure the CIO2 unit
  * \param[in] size The requested CIO2 output frame size
+ * \param[in] transform The transformation to be applied on the image sensor
  * \param[out] outputFormat The CIO2 unit output image format
  * \return 0 on success or a negative error code otherwise
  */
-int CIO2Device::configure(const Size &size, V4L2DeviceFormat *outputFormat)
+int CIO2Device::configure(const Size &size, const Transform &transform,
+			  V4L2DeviceFormat *outputFormat)
 {
 	V4L2SubdeviceFormat sensorFormat;
 	int ret;
@@ -191,6 +194,7 @@ int CIO2Device::configure(const Size &size, V4L2DeviceFormat *outputFormat)
 	 */
 	std::vector<unsigned int> mbusCodes = utils::map_keys(mbusCodesToPixelFormat);
 	sensorFormat = getSensorFormat(mbusCodes, size);
+	sensorFormat.transform = transform;
 	ret = sensor_->setFormat(&sensorFormat);
 	if (ret)
 		return ret;
diff --git a/src/libcamera/pipeline/ipu3/cio2.h b/src/libcamera/pipeline/ipu3/cio2.h
index 68504a2da89d..bbd87eb8ceb6 100644
--- a/src/libcamera/pipeline/ipu3/cio2.h
+++ b/src/libcamera/pipeline/ipu3/cio2.h
@@ -26,6 +26,7 @@ class Request;
 class Size;
 class SizeRange;
 struct StreamConfiguration;
+enum class Transform;
 
 class CIO2Device
 {
@@ -38,7 +39,8 @@ public:
 	std::vector<SizeRange> sizes(const PixelFormat &format) const;
 
 	int init(const MediaDevice *media, unsigned int index);
-	int configure(const Size &size, V4L2DeviceFormat *outputFormat);
+	int configure(const Size &size, const Transform &transform,
+		      V4L2DeviceFormat *outputFormat);
 
 	StreamConfiguration generateConfiguration(Size size) const;
 
diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp b/src/libcamera/pipeline/ipu3/ipu3.cpp
index a424ac914859..3a569c7e0031 100644
--- a/src/libcamera/pipeline/ipu3/ipu3.cpp
+++ b/src/libcamera/pipeline/ipu3/ipu3.cpp
@@ -51,7 +51,7 @@ class IPU3CameraData : public Camera::Private
 {
 public:
 	IPU3CameraData(PipelineHandler *pipe)
-		: Camera::Private(pipe), supportsFlips_(false)
+		: Camera::Private(pipe)
 	{
 	}
 
@@ -73,7 +73,6 @@ public:
 	Stream rawStream_;
 
 	Rectangle cropRegion_;
-	bool supportsFlips_;
 	Transform rotationTransform_;
 
 	std::unique_ptr<DelayedControls> delayedCtrls_;
@@ -539,7 +538,7 @@ int PipelineHandlerIPU3::configure(Camera *camera, CameraConfiguration *c)
 	 */
 	const Size &sensorSize = config->cio2Format().size;
 	V4L2DeviceFormat cio2Format;
-	ret = cio2->configure(sensorSize, &cio2Format);
+	ret = cio2->configure(sensorSize, config->combinedTransform_, &cio2Format);
 	if (ret)
 		return ret;
 
@@ -547,24 +546,6 @@ int PipelineHandlerIPU3::configure(Camera *camera, CameraConfiguration *c)
 	cio2->sensor()->sensorInfo(&sensorInfo);
 	data->cropRegion_ = sensorInfo.analogCrop;
 
-	/*
-	 * Configure the H/V flip controls based on the combination of
-	 * the sensor and user transform.
-	 */
-	if (data->supportsFlips_) {
-		ControlList sensorCtrls(cio2->sensor()->controls());
-		sensorCtrls.set(V4L2_CID_HFLIP,
-				static_cast<int32_t>(!!(config->combinedTransform_
-							& Transform::HFlip)));
-		sensorCtrls.set(V4L2_CID_VFLIP,
-				static_cast<int32_t>(!!(config->combinedTransform_
-						        & Transform::VFlip)));
-
-		ret = cio2->sensor()->setControls(&sensorCtrls);
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * If the ImgU gets configured, its driver seems to expect that
 	 * buffers will be queued to its outputs, as otherwise the next
@@ -1127,11 +1108,6 @@ int PipelineHandlerIPU3::registerCameras()
 			LOG(IPU3, Warning) << "Invalid rotation of " << rotationValue
 					   << " degrees: ignoring";
 
-		ControlList ctrls = cio2->sensor()->getControls({ V4L2_CID_HFLIP });
-		if (!ctrls.empty())
-			/* We assume the sensor supports VFLIP too. */
-			data->supportsFlips_ = true;
-
 		/**
 		 * \todo Dynamically assign ImgU and output devices to each
 		 * stream and camera; as of now, limit support to two cameras
diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
index c086a69a913d..d8232ff8e065 100644
--- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
+++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
@@ -691,24 +691,12 @@ int PipelineHandlerRPi::configure(Camera *camera, CameraConfiguration *config)
 		}
 	}
 
-	/*
-	 * Configure the H/V flip controls based on the combination of
-	 * the sensor and user transform.
-	 */
-	if (data->supportsFlips_) {
-		const RPiCameraConfiguration *rpiConfig =
-			static_cast<const RPiCameraConfiguration *>(config);
-		ControlList controls;
-
-		controls.set(V4L2_CID_HFLIP,
-			     static_cast<int32_t>(!!(rpiConfig->combinedTransform_ & Transform::HFlip)));
-		controls.set(V4L2_CID_VFLIP,
-			     static_cast<int32_t>(!!(rpiConfig->combinedTransform_ & Transform::VFlip)));
-		data->setSensorControls(controls);
-	}
-
 	/* First calculate the best sensor mode we can use based on the user request. */
 	V4L2SubdeviceFormat sensorFormat = findBestFormat(data->sensorFormats_, rawStream ? sensorSize : maxSize, bitDepth);
+	/* Apply any cached transform. */
+	const RPiCameraConfiguration *rpiConfig = static_cast<const RPiCameraConfiguration *>(config);
+	sensorFormat.transform = rpiConfig->combinedTransform_;
+	/* Finally apply the format on the sensor. */
 	ret = data->sensor_->setFormat(&sensorFormat);
 	if (ret)
 		return ret;
@@ -1293,10 +1281,9 @@ int PipelineHandlerRPi::registerCamera(MediaDevice *unicam, MediaDevice *isp, Me
 	 * We cache three things about the sensor in relation to transforms
 	 * (meaning horizontal and vertical flips).
 	 *
-	 * Firstly, does it support them?
-	 * Secondly, if you use them does it affect the Bayer ordering?
-	 * Thirdly, what is the "native" Bayer order, when no transforms are
-	 * applied?
+	 * If flips are supported verify if they affect the Bayer ordering
+	 * and what the "native" Bayer order is, when no transforms are
+	 * applied.
 	 *
 	 * We note that the sensor's cached list of supported formats is
 	 * already in the "native" order, with any flips having been undone.
diff --git a/src/libcamera/v4l2_subdevice.cpp b/src/libcamera/v4l2_subdevice.cpp
index 15e8206a915c..38ff8b0c605b 100644
--- a/src/libcamera/v4l2_subdevice.cpp
+++ b/src/libcamera/v4l2_subdevice.cpp
@@ -216,6 +216,13 @@ const std::map<uint32_t, V4L2SubdeviceFormatInfo> formatInfoMap = {
  * resulting color space is acceptable.
  */
 
+/**
+ * \var V4L2SubdeviceFormat::transform
+ * \brief The transform (vertical/horizontal flips) to be applied on the subdev
+ *
+ * Default initialized to Identity (no transform).
+ */
+
 /**
  * \brief Assemble and return a string describing the format
  * \return A string describing the V4L2SubdeviceFormat
