diff --git a/include/libcamera/internal/v4l2_videodevice.h b/include/libcamera/internal/v4l2_videodevice.h
index 4d21f5a01ec8..06c65e863a99 100644
--- a/include/libcamera/internal/v4l2_videodevice.h
+++ b/include/libcamera/internal/v4l2_videodevice.h
@@ -168,6 +168,8 @@ public:
 class V4L2VideoDevice : public V4L2Device
 {
 public:
+	using Formats = ImageFormats<V4L2PixelFormat>;
+
 	explicit V4L2VideoDevice(const std::string &deviceNode);
 	explicit V4L2VideoDevice(const MediaEntity *entity);
 	V4L2VideoDevice(const V4L2VideoDevice &) = delete;
@@ -187,7 +189,7 @@ public:

 	int getFormat(V4L2DeviceFormat *format);
 	int setFormat(V4L2DeviceFormat *format);
-	std::map<V4L2PixelFormat, std::vector<SizeRange>> formats(uint32_t code = 0);
+	Formats formats(uint32_t code = 0);

 	int setSelection(unsigned int target, Rectangle *rect);

diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
index 9d887b706c3f..ffb3c5cc45d9 100644
--- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
+++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
@@ -37,8 +37,6 @@ namespace libcamera {

 LOG_DEFINE_CATEGORY(RPI)

-using V4L2PixFmtMap = std::map<V4L2PixelFormat, std::vector<SizeRange>>;
-
 namespace {

 bool isRaw(PixelFormat &pixFmt)
@@ -67,7 +65,7 @@ double scoreFormat(double desired, double actual)
 	return score;
 }

-V4L2DeviceFormat findBestMode(V4L2PixFmtMap &formatsMap, const Size &req)
+V4L2DeviceFormat findBestMode(V4L2VideoDevice::Formats &formatsMap, const Size &req)
 {
 	double bestScore = 9e9, score;
 	V4L2DeviceFormat bestMode = {};
@@ -79,7 +77,7 @@ V4L2DeviceFormat findBestMode(V4L2PixFmtMap &formatsMap, const Size &req)
 #define PENALTY_UNPACKED	 500.0

 	/* Calculate the closest/best mode from the user requested size. */
-	for (const auto &iter : formatsMap) {
+	for (const auto &iter : formatsMap.data()) {
 		V4L2PixelFormat v4l2Format = iter.first;
 		PixelFormat pixelFormat = v4l2Format.toPixelFormat();
 		const PixelFormatInfo &info = PixelFormatInfo::info(pixelFormat);
@@ -427,7 +425,7 @@ CameraConfiguration::Status RPiCameraConfiguration::validate()
 			 * Calculate the best sensor mode we can use based on
 			 * the user request.
 			 */
-			V4L2PixFmtMap fmts = data_->unicam_[Unicam::Image].dev()->formats();
+			V4L2VideoDevice::Formats fmts = data_->unicam_[Unicam::Image].dev()->formats();
 			V4L2DeviceFormat sensorFormat = findBestMode(fmts, cfg.size);
 			PixelFormat sensorPixFormat = sensorFormat.fourcc.toPixelFormat();
 			if (cfg.size != sensorFormat.size ||
@@ -481,14 +479,14 @@ CameraConfiguration::Status RPiCameraConfiguration::validate()
 		 *
 		 */
 		PixelFormat &cfgPixFmt = config_.at(outSize[i].first).pixelFormat;
-		V4L2PixFmtMap fmts;
+		V4L2VideoDevice::Formats fmts;

 		if (i == maxIndex)
 			fmts = data_->isp_[Isp::Output0].dev()->formats();
 		else
 			fmts = data_->isp_[Isp::Output1].dev()->formats();

-		if (fmts.find(V4L2PixelFormat::fromPixelFormat(cfgPixFmt, false)) == fmts.end()) {
+		if (!fmts.contains(V4L2PixelFormat::fromPixelFormat(cfgPixFmt, false))) {
 			/* If we cannot find a native format, use a default one. */
 			cfgPixFmt = formats::NV12;
 			status = Adjusted;
@@ -518,9 +516,9 @@ CameraConfiguration *PipelineHandlerRPi::generateConfiguration(Camera *camera,
 	RPiCameraData *data = cameraData(camera);
 	CameraConfiguration *config = new RPiCameraConfiguration(data);
 	V4L2DeviceFormat sensorFormat;
+	V4L2VideoDevice::Formats fmts;
 	unsigned int bufferCount;
 	PixelFormat pixelFormat;
-	V4L2PixFmtMap fmts;
 	Size size;

 	if (roles.empty())
@@ -580,8 +578,9 @@ CameraConfiguration *PipelineHandlerRPi::generateConfiguration(Camera *camera,

 		/* Translate the V4L2PixelFormat to PixelFormat. */
 		std::map<PixelFormat, std::vector<SizeRange>> deviceFormats;
-		std::transform(fmts.begin(), fmts.end(), std::inserter(deviceFormats, deviceFormats.end()),
-			       [&](const decltype(fmts)::value_type &format) {
+		std::transform(fmts.data().begin(), fmts.data().end(),
+			       std::inserter(deviceFormats, deviceFormats.end()),
+			       [&](const auto &format) {
 					return decltype(deviceFormats)::value_type{
 						format.first.toPixelFormat(),
 						format.second
@@ -638,7 +637,7 @@ int PipelineHandlerRPi::configure(Camera *camera, CameraConfiguration *config)
 	}

 	/* First calculate the best sensor mode we can use based on the user request. */
-	V4L2PixFmtMap fmts = data->unicam_[Unicam::Image].dev()->formats();
+	V4L2VideoDevice::Formats fmts = data->unicam_[Unicam::Image].dev()->formats();
 	V4L2DeviceFormat sensorFormat = findBestMode(fmts, rawStream ? sensorSize : maxSize);

 	/*
diff --git a/src/libcamera/pipeline/simple/converter.cpp b/src/libcamera/pipeline/simple/converter.cpp
index e5e2f0fddb62..701db96138b0 100644
--- a/src/libcamera/pipeline/simple/converter.cpp
+++ b/src/libcamera/pipeline/simple/converter.cpp
@@ -85,7 +85,7 @@ std::vector<PixelFormat> SimpleConverter::formats(PixelFormat input)

 	std::vector<PixelFormat> pixelFormats;

-	for (const auto &format : m2m_->capture()->formats()) {
+	for (const auto &format : m2m_->capture()->formats().data()) {
 		PixelFormat pixelFormat = format.first.toPixelFormat();
 		if (pixelFormat)
 			pixelFormats.push_back(pixelFormat);
diff --git a/src/libcamera/pipeline/simple/simple.cpp b/src/libcamera/pipeline/simple/simple.cpp
index 1ec8d0f7de03..202a7e85375c 100644
--- a/src/libcamera/pipeline/simple/simple.cpp
+++ b/src/libcamera/pipeline/simple/simple.cpp
@@ -275,13 +275,12 @@ int SimpleCameraData::init()
 			return ret;
 		}

-		std::map<V4L2PixelFormat, std::vector<SizeRange>> videoFormats =
-			video_->formats(format.mbus_code);
+		V4L2VideoDevice::Formats videoFormats = video_->formats(format.mbus_code);

 		LOG(SimplePipeline, Debug)
 			<< "Adding configuration for " << format.size.toString()
 			<< " in pixel formats [ "
-			<< utils::join(videoFormats, ", ",
+			<< utils::join(videoFormats.data(), ", ",
 				       [](const auto &f) {
 					       return f.first.toString();
 				       })
@@ -294,7 +293,7 @@ int SimpleCameraData::init()
 		 * handler currently doesn't care about how a particular
 		 * PixelFormat is achieved.
 		 */
-		for (const auto &videoFormat : videoFormats) {
+		for (const auto &videoFormat : videoFormats.data()) {
 			PixelFormat pixelFormat = videoFormat.first.toPixelFormat();
 			if (!pixelFormat)
 				continue;
diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
index 80a0e77ba3fc..4c7812d0fb6b 100644
--- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
+++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
@@ -158,12 +158,11 @@ CameraConfiguration *PipelineHandlerUVC::generateConfiguration(Camera *camera,
 	if (roles.empty())
 		return config;

-	std::map<V4L2PixelFormat, std::vector<SizeRange>> v4l2Formats =
-		data->video_->formats();
+	V4L2VideoDevice::Formats v4l2Formats = data->video_->formats();
 	std::map<PixelFormat, std::vector<SizeRange>> deviceFormats;
-	std::transform(v4l2Formats.begin(), v4l2Formats.end(),
+	std::transform(v4l2Formats.data().begin(), v4l2Formats.data().end(),
 		       std::inserter(deviceFormats, deviceFormats.begin()),
-		       [&](const decltype(v4l2Formats)::value_type &format) {
+		       [&](const auto &format) {
 			       return decltype(deviceFormats)::value_type{
 				       format.first.toPixelFormat(),
 				       format.second
diff --git a/src/libcamera/v4l2_videodevice.cpp b/src/libcamera/v4l2_videodevice.cpp
index 3614b2ed1cbc..63e09cf9616a 100644
--- a/src/libcamera/v4l2_videodevice.cpp
+++ b/src/libcamera/v4l2_videodevice.cpp
@@ -461,6 +461,12 @@ const std::string V4L2DeviceFormat::toString() const
  * \context This class is \threadbound.
  */

+/**
+ * \typedef V4L2VideoDevice::Formats
+ * \brief An ImageFormats specialization mapping V4L2PixelFormat instances to
+ * image resolutions
+ */
+
 /**
  * \brief Construct a V4L2VideoDevice
  * \param[in] deviceNode The file-system path to the video device node
@@ -925,23 +931,23 @@ int V4L2VideoDevice::setFormatSingleplane(V4L2DeviceFormat *format)
  *
  * \return A list of the supported video device formats
  */
-std::map<V4L2PixelFormat, std::vector<SizeRange>> V4L2VideoDevice::formats(uint32_t code)
+V4L2VideoDevice::Formats V4L2VideoDevice::formats(uint32_t code)
 {
-	std::map<V4L2PixelFormat, std::vector<SizeRange>> formats;
+	Formats formats;

 	for (V4L2PixelFormat pixelFormat : enumPixelformats(code)) {
 		std::vector<SizeRange> sizes = enumSizes(pixelFormat);
 		if (sizes.empty())
 			return {};

-		if (formats.find(pixelFormat) != formats.end()) {
+		if (!formats.contains(pixelFormat)) {
 			LOG(V4L2, Error)
 				<< "Could not add sizes for pixel format "
 				<< pixelFormat;
 			return {};
 		}

-		formats.emplace(pixelFormat, sizes);
+		formats.addFormat(pixelFormat, sizes);
 	}

 	return formats;
