[libcamera-devel,v2,1/3] gstreamer: Add sensor mode selection
diff mbox series

Message ID 20230324181247.302586-2-nicolas@ndufresne.ca
State New
Headers show
Series
  • Add sensor mode selection to GStreamer
Related show

Commit Message

Nicolas Dufresne March 24, 2023, 6:12 p.m. UTC
From: Nicolas Dufresne <nicolas.dufresne@collabora.com>

This add support for selecting the sensor mode. A new read-only
property called sensor-modes is filled when the element reaches
READY state. It contains the list of all available sensor modes,
including the supported framerate range. This is exposed as GstCaps
in the form of sensor/mode,width=X,height=Y,format=Y,framerate=[...].
The format string matches the libcamera format string representation.

The application can then select a mode using the read/write sensor-mode
control. The selected mode is also a caps, it will be intersected with
the supported mode and the "best" match will be picked. This allows
application to use simple filter when they want to pick a mode for lets
say a specific framerate (e.g. sensor/mode,framerate=60/1).

Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
---
 src/gstreamer/gstlibcamera-utils.cpp |   4 +
 src/gstreamer/gstlibcamerasrc.cpp    | 110 ++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 2 deletions(-)

Comments

Robert Mader March 24, 2023, 6:24 p.m. UTC | #1
Hi,

On 24.03.23 19:12, Nicolas Dufresne via libcamera-devel wrote:
> From: Nicolas Dufresne<nicolas.dufresne@collabora.com>
>
> This add support for selecting the sensor mode. A new read-only
> property called sensor-modes is filled when the element reaches
> READY state. It contains the list of all available sensor modes,
> including the supported framerate range. This is exposed as GstCaps
> in the form of sensor/mode,width=X,height=Y,format=Y,framerate=[...].
> The format string matches the libcamera format string representation.
>
> The application can then select a mode using the read/write sensor-mode
> control. The selected mode is also a caps, it will be intersected with
> the supported mode and the "best" match will be picked. This allows
> application to use simple filter when they want to pick a mode for lets
> say a specific framerate (e.g. sensor/mode,framerate=60/1).
>
> Signed-off-by: Nicolas Dufresne<nicolas.dufresne@collabora.com>
> ---
>   src/gstreamer/gstlibcamera-utils.cpp |   4 +
>   src/gstreamer/gstlibcamerasrc.cpp    | 110 ++++++++++++++++++++++++++-
>   2 files changed, 112 insertions(+), 2 deletions(-)
>
> diff --git a/src/gstreamer/gstlibcamera-utils.cpp b/src/gstreamer/gstlibcamera-utils.cpp
> index 750ec351..c8a8df09 100644
> --- a/src/gstreamer/gstlibcamera-utils.cpp
> +++ b/src/gstreamer/gstlibcamera-utils.cpp
> @@ -416,6 +416,10 @@ gst_libcamera_configure_stream_from_caps(StreamConfiguration &stream_cfg,
>   		stream_cfg.pixelFormat = gst_format_to_pixel_format(gst_format);
>   	} else if (gst_structure_has_name(s, "image/jpeg")) {
>   		stream_cfg.pixelFormat = formats::MJPEG;
> +	} else if (gst_structure_has_name(s, "sensor/mode")) {
> +		gst_structure_fixate_field(s, "format");
> +		const gchar *format = gst_structure_get_string(s, "format");
> +		stream_cfg.pixelFormat = PixelFormat::fromString(format);
>   	} else {
>   		g_critical("Unsupported media type: %s", gst_structure_get_name(s));
>   	}
> diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp
> index a10cbd4f..2f05a03f 100644
> --- a/src/gstreamer/gstlibcamerasrc.cpp
> +++ b/src/gstreamer/gstlibcamerasrc.cpp
> @@ -147,6 +147,9 @@ struct _GstLibcameraSrc {
>   
>   	gchar *camera_name;
>   
> +	GstCaps *sensor_modes;
> +	GstCaps *sensor_mode;
> +
>   	GstLibcameraSrcState *state;
>   	GstLibcameraAllocator *allocator;
>   	GstFlowCombiner *flow_combiner;
> @@ -154,7 +157,10 @@ struct _GstLibcameraSrc {
>   
>   enum {
>   	PROP_0,
> -	PROP_CAMERA_NAME
> +	PROP_CAMERA_NAME,
> +	PROP_SENSOR_MODES,
> +	PROP_SENSOR_MODE,
> +	PROP_LAST
>   };
>   
>   G_DEFINE_TYPE_WITH_CODE(GstLibcameraSrc, gst_libcamera_src, GST_TYPE_ELEMENT,
> @@ -318,6 +324,61 @@ int GstLibcameraSrcState::processRequest()
>   	return err;
>   }
>   
> +static GstCaps *
> +gst_libcamera_src_enumerate_sensor_modes(GstLibcameraSrc *self)
> +{
> +	GstCaps *modes = gst_caps_new_empty();
> +	GstLibcameraSrcState *state = self->state;
> +	auto config = state->cam_->generateConfiguration({ libcamera::StreamRole::Raw,
> +							   libcamera::StreamRole::VideoRecording });
> +	if (config == nullptr) {
> +		GST_DEBUG_OBJECT(self, "Sensor mode selection is not supported, skipping enumeration.");
> +		return modes;
> +	}

I gave this a short try on both my USB camera and on a PPP. In both 
cases enumeration gets skipped (as intended), but in the later case the 
following warning gets printed:

 > ERROR RkISP1 rkisp1.cpp:684 Can't capture both raw and processed streams

So I wonder if we can come a better check here.

When allowing the enumeration by only selecting Raw or VideoRecording I 
made the following observations:

 1. enumeration on the USB camera is very slow, takes several seconds
 2. on the PPP / RkISP1, even though there are many more modes, the
    enumeration is super fast - probably a fraction of a second. So a
    method like this could probably be used for framerate enumeration.
 3. the mode selection via sensor-mode=sensor/mode,... usually fails
    though (haven't digged deeper why yet)

Regards,

Robert

> +
> +	const libcamera::StreamFormats &formats = config->at(0).formats();
> +
> +	for (const auto &pixfmt : formats.pixelformats()) {
> +		for (const auto &size : formats.sizes(pixfmt)) {
> +			config->at(0).size = size;
> +			config->at(0).pixelFormat = pixfmt;
> +
> +			if (config->validate() == CameraConfiguration::Invalid)
> +				continue;
> +
> +			if (state->cam_->configure(config.get()))
> +				continue;
> +
> +			auto fd_ctrl = state->cam_->controls().find(&controls::FrameDurationLimits);
> +			if (fd_ctrl == state->cam_->controls().end())
> +				continue;
> +
> +			int minrate_num, minrate_denom;
> +			int maxrate_num, maxrate_denom;
> +			double min_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> +					       gst_util_guint64_to_gdouble(fd_ctrl->second.max().get<int64_t>());
> +			double max_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> +					       gst_util_guint64_to_gdouble(fd_ctrl->second.min().get<int64_t>());
> +			gst_util_double_to_fraction(min_framerate, &minrate_num, &minrate_denom);
> +			gst_util_double_to_fraction(max_framerate, &maxrate_num, &maxrate_denom);
> +
> +			GstStructure *s = gst_structure_new("sensor/mode",
> +							    "format", G_TYPE_STRING, pixfmt.toString().c_str(),
> +							    "width", G_TYPE_INT, size.width,
> +							    "height", G_TYPE_INT, size.height,
> +							    "framerate", GST_TYPE_FRACTION_RANGE,
> +							    minrate_num, minrate_denom,
> +							    maxrate_num, maxrate_denom,
> +							    nullptr);
> +			gst_caps_append_structure(modes, s);
> +		}
> +	}
> +
> +	GST_DEBUG_OBJECT(self, "Camera sensor modes: %" GST_PTR_FORMAT, modes);
> +
> +	return modes;
> +}
> +
>   static bool
>   gst_libcamera_src_open(GstLibcameraSrc *self)
>   {
> @@ -375,6 +436,7 @@ gst_libcamera_src_open(GstLibcameraSrc *self)
>   	/* No need to lock here, we didn't start our threads yet. */
>   	self->state->cm_ = cm;
>   	self->state->cam_ = cam;
> +	self->sensor_modes = gst_libcamera_src_enumerate_sensor_modes(self);
>   
>   	return true;
>   }
> @@ -462,6 +524,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
>   	GstLibcameraSrcState *state = self->state;
>   	GstFlowReturn flow_ret = GST_FLOW_OK;
>   	gint ret;
> +	g_autoptr(GstCaps) sensor_mode = nullptr;
>   
>   	g_autoptr(GstStructure) element_caps = gst_structure_new_empty("caps");
>   
> @@ -481,6 +544,16 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
>   		roles.push_back(gst_libcamera_pad_get_role(srcpad));
>   	}
>   
> +	if (!gst_caps_is_any(self->sensor_mode)) {
> +		sensor_mode = gst_caps_intersect(self->sensor_mode, self->sensor_modes);
> +		if (!gst_caps_is_empty(sensor_mode)) {
> +			roles.push_back(libcamera::StreamRole::Raw);
> +		} else {
> +			GST_WARNING_OBJECT(self, "No sensor mode matching the selection, ignoring.");
> +			gst_clear_caps(&sensor_mode);
> +		}
> +	}
> +
>   	/* Generate the stream configurations, there should be one per pad. */
>   	state->config_ = state->cam_->generateConfiguration(roles);
>   	if (state->config_ == nullptr) {
> @@ -490,7 +563,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
>   		gst_task_stop(task);
>   		return;
>   	}
> -	g_assert(state->config_->size() == state->srcpads_.size());
> +	g_assert(state->config_->size() == state->srcpads_.size() + (!!sensor_mode));
>   
>   	for (gsize i = 0; i < state->srcpads_.size(); i++) {
>   		GstPad *srcpad = state->srcpads_[i];
> @@ -510,6 +583,12 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
>   		gst_libcamera_get_framerate_from_caps(caps, element_caps);
>   	}
>   
> +	if (sensor_mode) {
> +		StreamConfiguration &stream_cfg = state->config_->at(state->srcpads_.size());
> +		g_assert(gst_caps_is_writable(sensor_mode));
> +		gst_libcamera_configure_stream_from_caps(stream_cfg, sensor_mode);
> +	}
> +
>   	if (flow_ret != GST_FLOW_OK)
>   		goto done;
>   
> @@ -624,6 +703,7 @@ gst_libcamera_src_task_leave([[maybe_unused]] GstTask *task,
>   	g_clear_object(&self->allocator);
>   	g_clear_pointer(&self->flow_combiner,
>   			(GDestroyNotify)gst_flow_combiner_free);
> +	gst_clear_caps(&self->sensor_modes);
>   }
>   
>   static void
> @@ -659,6 +739,10 @@ gst_libcamera_src_set_property(GObject *object, guint prop_id,
>   		g_free(self->camera_name);
>   		self->camera_name = g_value_dup_string(value);
>   		break;
> +	case PROP_SENSOR_MODE:
> +		gst_clear_caps(&self->sensor_mode);
> +		self->sensor_mode = GST_CAPS(g_value_dup_boxed(value));
> +		break;
>   	default:
>   		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
>   		break;
> @@ -676,6 +760,12 @@ gst_libcamera_src_get_property(GObject *object, guint prop_id, GValue *value,
>   	case PROP_CAMERA_NAME:
>   		g_value_set_string(value, self->camera_name);
>   		break;
> +	case PROP_SENSOR_MODES:
> +		g_value_set_boxed(value, self->sensor_modes);
> +		break;
> +	case PROP_SENSOR_MODE:
> +		g_value_set_boxed(value, self->sensor_mode);
> +		break;
>   	default:
>   		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
>   		break;
> @@ -763,6 +853,7 @@ gst_libcamera_src_init(GstLibcameraSrc *self)
>   	/* C-style friend. */
>   	state->src_ = self;
>   	self->state = state;
> +	self->sensor_mode = gst_caps_new_any();
>   }
>   
>   static GstPad *
> @@ -844,4 +935,19 @@ gst_libcamera_src_class_init(GstLibcameraSrcClass *klass)
>   							     | G_PARAM_READWRITE
>   							     | G_PARAM_STATIC_STRINGS));
>   	g_object_class_install_property(object_class, PROP_CAMERA_NAME, spec);
> +
> +	spec = g_param_spec_boxed("sensor-modes", "Sensor Modes",
> +				  "GstCaps representing available sensor modes.",
> +				  GST_TYPE_CAPS,
> +				  (GParamFlags)(G_PARAM_READABLE
> +					        | G_PARAM_STATIC_STRINGS));
> +	g_object_class_install_property(object_class, PROP_SENSOR_MODES, spec);
> +
> +	spec = g_param_spec_boxed("sensor-mode", "Sensor Mode",
> +				  "GstCaps representing selected sensor mode.",
> +				  GST_TYPE_CAPS,
> +				  (GParamFlags)(GST_PARAM_MUTABLE_READY
> +					        | G_PARAM_READWRITE
> +					        | G_PARAM_STATIC_STRINGS));
> +	g_object_class_install_property(object_class, PROP_SENSOR_MODE, spec);
>   }
Nicolas Dufresne March 24, 2023, 7:11 p.m. UTC | #2
Le vendredi 24 mars 2023 à 18:24 +0000, Robert Mader via libcamera-devel a
écrit :
> Hi,
> On 24.03.23 19:12, Nicolas Dufresne via libcamera-devel wrote:
>  
> > From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > 
> > This add support for selecting the sensor mode. A new read-only
> > property called sensor-modes is filled when the element reaches
> > READY state. It contains the list of all available sensor modes,
> > including the supported framerate range. This is exposed as GstCaps
> > in the form of sensor/mode,width=X,height=Y,format=Y,framerate=[...].
> > The format string matches the libcamera format string representation.
> > 
> > The application can then select a mode using the read/write sensor-mode
> > control. The selected mode is also a caps, it will be intersected with
> > the supported mode and the "best" match will be picked. This allows
> > application to use simple filter when they want to pick a mode for lets
> > say a specific framerate (e.g. sensor/mode,framerate=60/1).
> > 
> > Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > ---
> >  src/gstreamer/gstlibcamera-utils.cpp | 4 +
> >  src/gstreamer/gstlibcamerasrc.cpp | 110 ++++++++++++++++++++++++++-
> >  2 files changed, 112 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/gstreamer/gstlibcamera-utils.cpp b/src/gstreamer/gstlibcamera-utils.cpp
> > index 750ec351..c8a8df09 100644
> > --- a/src/gstreamer/gstlibcamera-utils.cpp
> > +++ b/src/gstreamer/gstlibcamera-utils.cpp
> > @@ -416,6 +416,10 @@ gst_libcamera_configure_stream_from_caps(StreamConfiguration &stream_cfg,
> >  stream_cfg.pixelFormat = gst_format_to_pixel_format(gst_format);
> >  } else if (gst_structure_has_name(s, "image/jpeg")) {
> >  stream_cfg.pixelFormat = formats::MJPEG;
> > + } else if (gst_structure_has_name(s, "sensor/mode")) {
> > + gst_structure_fixate_field(s, "format");
> > + const gchar *format = gst_structure_get_string(s, "format");
> > + stream_cfg.pixelFormat = PixelFormat::fromString(format);
> >  } else {
> >  g_critical("Unsupported media type: %s", gst_structure_get_name(s));
> >  }
> > diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp
> > index a10cbd4f..2f05a03f 100644
> > --- a/src/gstreamer/gstlibcamerasrc.cpp
> > +++ b/src/gstreamer/gstlibcamerasrc.cpp
> > @@ -147,6 +147,9 @@ struct _GstLibcameraSrc {
> >  gchar *camera_name;
> > + GstCaps *sensor_modes;
> > + GstCaps *sensor_mode;
> > +
> >  GstLibcameraSrcState *state;
> >  GstLibcameraAllocator *allocator;
> >  GstFlowCombiner *flow_combiner;
> > @@ -154,7 +157,10 @@ struct _GstLibcameraSrc {
> >  enum {
> >  PROP_0,
> > - PROP_CAMERA_NAME
> > + PROP_CAMERA_NAME,
> > + PROP_SENSOR_MODES,
> > + PROP_SENSOR_MODE,
> > + PROP_LAST
> >  };
> >  G_DEFINE_TYPE_WITH_CODE(GstLibcameraSrc, gst_libcamera_src, GST_TYPE_ELEMENT,
> > @@ -318,6 +324,61 @@ int GstLibcameraSrcState::processRequest()
> >  return err;
> >  }
> > +static GstCaps *
> > +gst_libcamera_src_enumerate_sensor_modes(GstLibcameraSrc *self)
> > +{
> > + GstCaps *modes = gst_caps_new_empty();
> > + GstLibcameraSrcState *state = self->state;
> > + auto config = state->cam_->generateConfiguration({ libcamera::StreamRole::Raw,
> > + libcamera::StreamRole::VideoRecording });
> > + if (config == nullptr) {
> > + GST_DEBUG_OBJECT(self, "Sensor mode selection is not supported, skipping enumeration.");
> > + return modes;
> > + }
> I gave this a short try on both my USB camera and on a PPP. In both cases
> enumeration gets skipped (as intended), but in the later case the following
> warning gets printed:
> > ERROR RkISP1 rkisp1.cpp:684 Can't capture both raw and processed streams
> So I wonder if we can come a better check here.
> 
The fact the IPAs / Pipeline manager through stuff on the terminal should
probably not justify changing the API usage. The logging needs to be adapted so
it does not pretend there has been an error on behalf of the application using
the API. For me its seems evident that apps will try different numbers of roles
until the camera accepts the configuration.

> When allowing the enumeration by only selecting Raw or VideoRecording I made
> the following observations:
>    1. enumeration on the USB camera is very slow, takes several seconds
>    2. on the PPP / RkISP1, even though there are many more modes, the
>       enumeration is super fast - probably a fraction of a second. So a method
>       like this could probably be used for framerate enumeration.
>    3. the mode selection via sensor-mode=sensor/mode,... usually fails though
>       (haven't digged deeper why yet)

It fails because of  "ERROR RkISP1 rkisp1.cpp:684 Can't capture both raw and
processed streams".

In general, I don't think it is useful to enumerate the modes on PPP, because
the method to select sensor mode cannot be supported (you can't pass buffers
through the ISP while capturing the raw). In general, this just highlights that
sensor mode selection API is currently just a hack, as selecting sensor modes on
PPP seems rather important. Currently, only IPU3 and RPi have a sensor/isp model
that works with the API.

Nicolas

> Regards,
> Robert
>  
> > +
> > + const libcamera::StreamFormats &formats = config->at(0).formats();
> > +
> > + for (const auto &pixfmt : formats.pixelformats()) {
> > + for (const auto &size : formats.sizes(pixfmt)) {
> > + config->at(0).size = size;
> > + config->at(0).pixelFormat = pixfmt;
> > +
> > + if (config->validate() == CameraConfiguration::Invalid)
> > + continue;
> > +
> > + if (state->cam_->configure(config.get()))
> > + continue;
> > +
> > + auto fd_ctrl = state->cam_->controls().find(&controls::FrameDurationLimits);
> > + if (fd_ctrl == state->cam_->controls().end())
> > + continue;
> > +
> > + int minrate_num, minrate_denom;
> > + int maxrate_num, maxrate_denom;
> > + double min_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> > + gst_util_guint64_to_gdouble(fd_ctrl->second.max().get<int64_t>());
> > + double max_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> > + gst_util_guint64_to_gdouble(fd_ctrl->second.min().get<int64_t>());
> > + gst_util_double_to_fraction(min_framerate, &minrate_num, &minrate_denom);
> > + gst_util_double_to_fraction(max_framerate, &maxrate_num, &maxrate_denom);
> > +
> > + GstStructure *s = gst_structure_new("sensor/mode",
> > + "format", G_TYPE_STRING, pixfmt.toString().c_str(),
> > + "width", G_TYPE_INT, size.width,
> > + "height", G_TYPE_INT, size.height,
> > + "framerate", GST_TYPE_FRACTION_RANGE,
> > + minrate_num, minrate_denom,
> > + maxrate_num, maxrate_denom,
> > + nullptr);
> > + gst_caps_append_structure(modes, s);
> > + }
> > + }
> > +
> > + GST_DEBUG_OBJECT(self, "Camera sensor modes: %" GST_PTR_FORMAT, modes);
> > +
> > + return modes;
> > +}
> > +
> >  static bool
> >  gst_libcamera_src_open(GstLibcameraSrc *self)
> >  {
> > @@ -375,6 +436,7 @@ gst_libcamera_src_open(GstLibcameraSrc *self)
> >  /* No need to lock here, we didn't start our threads yet. */
> >  self->state->cm_ = cm;
> >  self->state->cam_ = cam;
> > + self->sensor_modes = gst_libcamera_src_enumerate_sensor_modes(self);
> >  return true;
> >  }
> > @@ -462,6 +524,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> >  GstLibcameraSrcState *state = self->state;
> >  GstFlowReturn flow_ret = GST_FLOW_OK;
> >  gint ret;
> > + g_autoptr(GstCaps) sensor_mode = nullptr;
> >  g_autoptr(GstStructure) element_caps = gst_structure_new_empty("caps");
> > @@ -481,6 +544,16 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> >  roles.push_back(gst_libcamera_pad_get_role(srcpad));
> >  }
> > + if (!gst_caps_is_any(self->sensor_mode)) {
> > + sensor_mode = gst_caps_intersect(self->sensor_mode, self->sensor_modes);
> > + if (!gst_caps_is_empty(sensor_mode)) {
> > + roles.push_back(libcamera::StreamRole::Raw);
> > + } else {
> > + GST_WARNING_OBJECT(self, "No sensor mode matching the selection, ignoring.");
> > + gst_clear_caps(&sensor_mode);
> > + }
> > + }
> > +
> >  /* Generate the stream configurations, there should be one per pad. */
> >  state->config_ = state->cam_->generateConfiguration(roles);
> >  if (state->config_ == nullptr) {
> > @@ -490,7 +563,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> >  gst_task_stop(task);
> >  return;
> >  }
> > - g_assert(state->config_->size() == state->srcpads_.size());
> > + g_assert(state->config_->size() == state->srcpads_.size() + (!!sensor_mode));
> >  for (gsize i = 0; i < state->srcpads_.size(); i++) {
> >  GstPad *srcpad = state->srcpads_[i];
> > @@ -510,6 +583,12 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> >  gst_libcamera_get_framerate_from_caps(caps, element_caps);
> >  }
> > + if (sensor_mode) {
> > + StreamConfiguration &stream_cfg = state->config_->at(state->srcpads_.size());
> > + g_assert(gst_caps_is_writable(sensor_mode));
> > + gst_libcamera_configure_stream_from_caps(stream_cfg, sensor_mode);
> > + }
> > +
> >  if (flow_ret != GST_FLOW_OK)
> >  goto done;
> > @@ -624,6 +703,7 @@ gst_libcamera_src_task_leave([[maybe_unused]] GstTask *task,
> >  g_clear_object(&self->allocator);
> >  g_clear_pointer(&self->flow_combiner,
> >  (GDestroyNotify)gst_flow_combiner_free);
> > + gst_clear_caps(&self->sensor_modes);
> >  }
> >  static void
> > @@ -659,6 +739,10 @@ gst_libcamera_src_set_property(GObject *object, guint prop_id,
> >  g_free(self->camera_name);
> >  self->camera_name = g_value_dup_string(value);
> >  break;
> > + case PROP_SENSOR_MODE:
> > + gst_clear_caps(&self->sensor_mode);
> > + self->sensor_mode = GST_CAPS(g_value_dup_boxed(value));
> > + break;
> >  default:
> >  G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
> >  break;
> > @@ -676,6 +760,12 @@ gst_libcamera_src_get_property(GObject *object, guint prop_id, GValue *value,
> >  case PROP_CAMERA_NAME:
> >  g_value_set_string(value, self->camera_name);
> >  break;
> > + case PROP_SENSOR_MODES:
> > + g_value_set_boxed(value, self->sensor_modes);
> > + break;
> > + case PROP_SENSOR_MODE:
> > + g_value_set_boxed(value, self->sensor_mode);
> > + break;
> >  default:
> >  G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
> >  break;
> > @@ -763,6 +853,7 @@ gst_libcamera_src_init(GstLibcameraSrc *self)
> >  /* C-style friend. */
> >  state->src_ = self;
> >  self->state = state;
> > + self->sensor_mode = gst_caps_new_any();
> >  }
> >  static GstPad *
> > @@ -844,4 +935,19 @@ gst_libcamera_src_class_init(GstLibcameraSrcClass *klass)
> >  | G_PARAM_READWRITE
> >  | G_PARAM_STATIC_STRINGS));
> >  g_object_class_install_property(object_class, PROP_CAMERA_NAME, spec);
> > +
> > + spec = g_param_spec_boxed("sensor-modes", "Sensor Modes",
> > + "GstCaps representing available sensor modes.",
> > + GST_TYPE_CAPS,
> > + (GParamFlags)(G_PARAM_READABLE
> > + | G_PARAM_STATIC_STRINGS));
> > + g_object_class_install_property(object_class, PROP_SENSOR_MODES, spec);
> > +
> > + spec = g_param_spec_boxed("sensor-mode", "Sensor Mode",
> > + "GstCaps representing selected sensor mode.",
> > + GST_TYPE_CAPS,
> > + (GParamFlags)(GST_PARAM_MUTABLE_READY
> > + | G_PARAM_READWRITE
> > + | G_PARAM_STATIC_STRINGS));
> > + g_object_class_install_property(object_class, PROP_SENSOR_MODE, spec);
> >  }
Jacopo Mondi March 27, 2023, 9:21 a.m. UTC | #3
Hi Nicolas,

On Fri, Mar 24, 2023 at 02:12:45PM -0400, Nicolas Dufresne via libcamera-devel wrote:
> From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
>
> This add support for selecting the sensor mode. A new read-only
> property called sensor-modes is filled when the element reaches
> READY state. It contains the list of all available sensor modes,
> including the supported framerate range. This is exposed as GstCaps
> in the form of sensor/mode,width=X,height=Y,format=Y,framerate=[...].
> The format string matches the libcamera format string representation.
>
> The application can then select a mode using the read/write sensor-mode
> control. The selected mode is also a caps, it will be intersected with
> the supported mode and the "best" match will be picked. This allows
> application to use simple filter when they want to pick a mode for lets
> say a specific framerate (e.g. sensor/mode,framerate=60/1).
>
> Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> ---
>  src/gstreamer/gstlibcamera-utils.cpp |   4 +
>  src/gstreamer/gstlibcamerasrc.cpp    | 110 ++++++++++++++++++++++++++-
>  2 files changed, 112 insertions(+), 2 deletions(-)
>
> diff --git a/src/gstreamer/gstlibcamera-utils.cpp b/src/gstreamer/gstlibcamera-utils.cpp
> index 750ec351..c8a8df09 100644
> --- a/src/gstreamer/gstlibcamera-utils.cpp
> +++ b/src/gstreamer/gstlibcamera-utils.cpp
> @@ -416,6 +416,10 @@ gst_libcamera_configure_stream_from_caps(StreamConfiguration &stream_cfg,
>  		stream_cfg.pixelFormat = gst_format_to_pixel_format(gst_format);
>  	} else if (gst_structure_has_name(s, "image/jpeg")) {
>  		stream_cfg.pixelFormat = formats::MJPEG;
> +	} else if (gst_structure_has_name(s, "sensor/mode")) {
> +		gst_structure_fixate_field(s, "format");
> +		const gchar *format = gst_structure_get_string(s, "format");
> +		stream_cfg.pixelFormat = PixelFormat::fromString(format);
>  	} else {
>  		g_critical("Unsupported media type: %s", gst_structure_get_name(s));
>  	}
> diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp
> index a10cbd4f..2f05a03f 100644
> --- a/src/gstreamer/gstlibcamerasrc.cpp
> +++ b/src/gstreamer/gstlibcamerasrc.cpp
> @@ -147,6 +147,9 @@ struct _GstLibcameraSrc {
>
>  	gchar *camera_name;
>
> +	GstCaps *sensor_modes;
> +	GstCaps *sensor_mode;
> +
>  	GstLibcameraSrcState *state;
>  	GstLibcameraAllocator *allocator;
>  	GstFlowCombiner *flow_combiner;
> @@ -154,7 +157,10 @@ struct _GstLibcameraSrc {
>
>  enum {
>  	PROP_0,
> -	PROP_CAMERA_NAME
> +	PROP_CAMERA_NAME,
> +	PROP_SENSOR_MODES,
> +	PROP_SENSOR_MODE,
> +	PROP_LAST
>  };
>
>  G_DEFINE_TYPE_WITH_CODE(GstLibcameraSrc, gst_libcamera_src, GST_TYPE_ELEMENT,
> @@ -318,6 +324,61 @@ int GstLibcameraSrcState::processRequest()
>  	return err;
>  }
>
> +static GstCaps *
> +gst_libcamera_src_enumerate_sensor_modes(GstLibcameraSrc *self)
> +{
> +	GstCaps *modes = gst_caps_new_empty();
> +	GstLibcameraSrcState *state = self->state;
> +	auto config = state->cam_->generateConfiguration({ libcamera::StreamRole::Raw,
> +							   libcamera::StreamRole::VideoRecording });

This clearly shows our API falls short, as the presence of the RAW
stream, and the ability to capture it alongside other streams, is
conditional on the platform's capabilities. So this can't be
considered a sufficiently stable way of retrieving information about
the sensor's configuration. Not your fault, we currently don't have
anything better to offer.

We currently have a way to report all the possible configurations for
a stream using the StreamFormat class. StreamFormat is currently
nothing but

	std::map<PixelFormat, std::vector<SizeRange>> formats_;

which gets populated by the pipeline handler while creating
the StreamConfiguration items which will be part of the generated
CameraConfiguration.

That's for the "Camera-provided information to applications" side.

On the "application to configure the Camera" side, StreamConfiguration
is used to tell the library how a Stream should be configured.
These are the fields a StreamConfiguration describe:

	PixelFormat pixelFormat;
	Size size;
	unsigned int stride;
	unsigned int frameSize;

	unsigned int bufferCount;

	std::optional<ColorSpace> colorSpace;

With pixelformat, size, bufferCount and (optionally) colorSpace which
are expected to be set by the application.

We have been discussing about adding to StreamConfiguration a
frameDuration since a long time. It is needed for some parts of the
Android HAL as well, and in general, as your patch shows, it is a
desirable feature. A similar reasoning can be made for the sensor's
configuration, even if this has received several push backs in the
past when RPi wanted something similar. Furthermore, I would be
tempted to consider the possible FrameDuration a property of the
sensor's configuration, but I presume that advanced use cases for
StillCapture chain to the canonical frame processing more stages, so
I presume it's better to have the FrameDurations separate from the
sensor configuration to allow describe additional delays there.

Summing it all up, we have StreamFormats to convey back to application
what a Stream can do, and StreamConfiguration to set the Stream
characteristics.

Addressing the needs you have here would require:

- Pipeline handlers to populate StreamFormats not only with
  <PixelFormat, vector<SizeRange>> but also with the possible sensor's
  configuration that can generated that mode and the possible frame
  durations

- Application to populate a StreamConfiguration with the desired
  FrameDuration and sensor mode

- Pipeline handler to validate that the desired sensor mode and
  durations are compatible with the stream characteristics.
  (as an example, if your pipeline can't upscale, you can't have a
  sensor mode with a size smaller than the desired output).

This puts more complexity on the pipeline side, in both the generation
of configurations and in their validation, but would such an API
address your needs in your opinion ? Have you had any thoughts on how
you would like to have this handled ?

That's really an open question for everyone, as I guess this part is
becoming critical as this patch shows and the workaround we pushed RPi
to is really not generic enough.

One more point here below

Thanks
   j

-------------------------------------------------------------------------------
Slightly unrelated point, but while you're here I would like to pick
your brain on the issue: the way StreamFormat are currently created is
slightly under-specified. The documentation goes in great length in
describing how the sizes associated with PixelFormat can be inspected
[1] but it only superficially describe how a pipeline handler should
populate the formats associated with a stream configuration [2]. I'll
make two examples:
- if the StreamConfiguration is for a 1080p Viewfinder stream should
  the associated StreamFormats list resolutions > 1080p ?
- if the StreamConfiguration is for a !Raw role, should RAW
  pixelformats be listed in the StreamFormats ?

There are arguments for both cases, but I guess the big picture
question is about how an application should expect to retrieve the
full camera capabilities:
- Does an application expect to receive only the available formats for
  the role it has asked a configuration for ?
- Does an application expect to receive all the possible formats a
  camera can produce regardless of the size/pixelformat of the
  StreamConfiguration that has been generated ?

As a concrete example, currently the RkISP1 pipeline handler list a
RAW StreamFormat for all configurations, regardless of the role. I had
a patch to "fix" this but I've been pointed to the fact the API was
not very well specified.

What would your expectations be in this case ?

[1] https://libcamera.org/api-html/classlibcamera_1_1StreamFormats.html
[2] https://libcamera.org/api-html/structlibcamera_1_1StreamConfiguration.html#a37725e6b9e179ca80be0e57ed97857d3
-------------------------------------------------------------------------------


> +	if (config == nullptr) {
> +		GST_DEBUG_OBJECT(self, "Sensor mode selection is not supported, skipping enumeration.");
> +		return modes;
> +	}
> +
> +	const libcamera::StreamFormats &formats = config->at(0).formats();
> +
> +	for (const auto &pixfmt : formats.pixelformats()) {
> +		for (const auto &size : formats.sizes(pixfmt)) {
> +			config->at(0).size = size;
> +			config->at(0).pixelFormat = pixfmt;
> +
> +			if (config->validate() == CameraConfiguration::Invalid)
> +				continue;
> +
> +			if (state->cam_->configure(config.get()))
> +				continue;
> +
> +			auto fd_ctrl = state->cam_->controls().find(&controls::FrameDurationLimits);
> +			if (fd_ctrl == state->cam_->controls().end())
> +				continue;
> +
> +			int minrate_num, minrate_denom;
> +			int maxrate_num, maxrate_denom;
> +			double min_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> +					       gst_util_guint64_to_gdouble(fd_ctrl->second.max().get<int64_t>());
> +			double max_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> +					       gst_util_guint64_to_gdouble(fd_ctrl->second.min().get<int64_t>());
> +			gst_util_double_to_fraction(min_framerate, &minrate_num, &minrate_denom);
> +			gst_util_double_to_fraction(max_framerate, &maxrate_num, &maxrate_denom);
> +
> +			GstStructure *s = gst_structure_new("sensor/mode",
> +							    "format", G_TYPE_STRING, pixfmt.toString().c_str(),
> +							    "width", G_TYPE_INT, size.width,
> +							    "height", G_TYPE_INT, size.height,
> +							    "framerate", GST_TYPE_FRACTION_RANGE,
> +							    minrate_num, minrate_denom,
> +							    maxrate_num, maxrate_denom,
> +							    nullptr);
> +			gst_caps_append_structure(modes, s);
> +		}
> +	}
> +
> +	GST_DEBUG_OBJECT(self, "Camera sensor modes: %" GST_PTR_FORMAT, modes);
> +
> +	return modes;
> +}
> +
>  static bool
>  gst_libcamera_src_open(GstLibcameraSrc *self)
>  {
> @@ -375,6 +436,7 @@ gst_libcamera_src_open(GstLibcameraSrc *self)
>  	/* No need to lock here, we didn't start our threads yet. */
>  	self->state->cm_ = cm;
>  	self->state->cam_ = cam;
> +	self->sensor_modes = gst_libcamera_src_enumerate_sensor_modes(self);
>
>  	return true;
>  }
> @@ -462,6 +524,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
>  	GstLibcameraSrcState *state = self->state;
>  	GstFlowReturn flow_ret = GST_FLOW_OK;
>  	gint ret;
> +	g_autoptr(GstCaps) sensor_mode = nullptr;
>
>  	g_autoptr(GstStructure) element_caps = gst_structure_new_empty("caps");
>
> @@ -481,6 +544,16 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
>  		roles.push_back(gst_libcamera_pad_get_role(srcpad));
>  	}
>
> +	if (!gst_caps_is_any(self->sensor_mode)) {
> +		sensor_mode = gst_caps_intersect(self->sensor_mode, self->sensor_modes);
> +		if (!gst_caps_is_empty(sensor_mode)) {
> +			roles.push_back(libcamera::StreamRole::Raw);
> +		} else {
> +			GST_WARNING_OBJECT(self, "No sensor mode matching the selection, ignoring.");
> +			gst_clear_caps(&sensor_mode);
> +		}
> +	}
> +
>  	/* Generate the stream configurations, there should be one per pad. */
>  	state->config_ = state->cam_->generateConfiguration(roles);
>  	if (state->config_ == nullptr) {
> @@ -490,7 +563,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
>  		gst_task_stop(task);
>  		return;
>  	}
> -	g_assert(state->config_->size() == state->srcpads_.size());
> +	g_assert(state->config_->size() == state->srcpads_.size() + (!!sensor_mode));
>
>  	for (gsize i = 0; i < state->srcpads_.size(); i++) {
>  		GstPad *srcpad = state->srcpads_[i];
> @@ -510,6 +583,12 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
>  		gst_libcamera_get_framerate_from_caps(caps, element_caps);
>  	}
>
> +	if (sensor_mode) {
> +		StreamConfiguration &stream_cfg = state->config_->at(state->srcpads_.size());
> +		g_assert(gst_caps_is_writable(sensor_mode));
> +		gst_libcamera_configure_stream_from_caps(stream_cfg, sensor_mode);
> +	}
> +
>  	if (flow_ret != GST_FLOW_OK)
>  		goto done;
>
> @@ -624,6 +703,7 @@ gst_libcamera_src_task_leave([[maybe_unused]] GstTask *task,
>  	g_clear_object(&self->allocator);
>  	g_clear_pointer(&self->flow_combiner,
>  			(GDestroyNotify)gst_flow_combiner_free);
> +	gst_clear_caps(&self->sensor_modes);
>  }
>
>  static void
> @@ -659,6 +739,10 @@ gst_libcamera_src_set_property(GObject *object, guint prop_id,
>  		g_free(self->camera_name);
>  		self->camera_name = g_value_dup_string(value);
>  		break;
> +	case PROP_SENSOR_MODE:
> +		gst_clear_caps(&self->sensor_mode);
> +		self->sensor_mode = GST_CAPS(g_value_dup_boxed(value));
> +		break;
>  	default:
>  		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
>  		break;
> @@ -676,6 +760,12 @@ gst_libcamera_src_get_property(GObject *object, guint prop_id, GValue *value,
>  	case PROP_CAMERA_NAME:
>  		g_value_set_string(value, self->camera_name);
>  		break;
> +	case PROP_SENSOR_MODES:
> +		g_value_set_boxed(value, self->sensor_modes);
> +		break;
> +	case PROP_SENSOR_MODE:
> +		g_value_set_boxed(value, self->sensor_mode);
> +		break;
>  	default:
>  		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
>  		break;
> @@ -763,6 +853,7 @@ gst_libcamera_src_init(GstLibcameraSrc *self)
>  	/* C-style friend. */
>  	state->src_ = self;
>  	self->state = state;
> +	self->sensor_mode = gst_caps_new_any();
>  }
>
>  static GstPad *
> @@ -844,4 +935,19 @@ gst_libcamera_src_class_init(GstLibcameraSrcClass *klass)
>  							     | G_PARAM_READWRITE
>  							     | G_PARAM_STATIC_STRINGS));
>  	g_object_class_install_property(object_class, PROP_CAMERA_NAME, spec);
> +
> +	spec = g_param_spec_boxed("sensor-modes", "Sensor Modes",
> +				  "GstCaps representing available sensor modes.",
> +				  GST_TYPE_CAPS,
> +				  (GParamFlags)(G_PARAM_READABLE
> +					        | G_PARAM_STATIC_STRINGS));
> +	g_object_class_install_property(object_class, PROP_SENSOR_MODES, spec);
> +
> +	spec = g_param_spec_boxed("sensor-mode", "Sensor Mode",
> +				  "GstCaps representing selected sensor mode.",
> +				  GST_TYPE_CAPS,
> +				  (GParamFlags)(GST_PARAM_MUTABLE_READY
> +					        | G_PARAM_READWRITE
> +					        | G_PARAM_STATIC_STRINGS));
> +	g_object_class_install_property(object_class, PROP_SENSOR_MODE, spec);
>  }
> --
> 2.39.2
>
Nicolas Dufresne March 27, 2023, 1:40 p.m. UTC | #4
Le lundi 27 mars 2023 à 11:21 +0200, Jacopo Mondi a écrit :
> Hi Nicolas,
> 
> On Fri, Mar 24, 2023 at 02:12:45PM -0400, Nicolas Dufresne via libcamera-devel wrote:
> > From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > 
> > This add support for selecting the sensor mode. A new read-only
> > property called sensor-modes is filled when the element reaches
> > READY state. It contains the list of all available sensor modes,
> > including the supported framerate range. This is exposed as GstCaps
> > in the form of sensor/mode,width=X,height=Y,format=Y,framerate=[...].
> > The format string matches the libcamera format string representation.
> > 
> > The application can then select a mode using the read/write sensor-mode
> > control. The selected mode is also a caps, it will be intersected with
> > the supported mode and the "best" match will be picked. This allows
> > application to use simple filter when they want to pick a mode for lets
> > say a specific framerate (e.g. sensor/mode,framerate=60/1).
> > 
> > Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > ---
> >  src/gstreamer/gstlibcamera-utils.cpp |   4 +
> >  src/gstreamer/gstlibcamerasrc.cpp    | 110 ++++++++++++++++++++++++++-
> >  2 files changed, 112 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/gstreamer/gstlibcamera-utils.cpp b/src/gstreamer/gstlibcamera-utils.cpp
> > index 750ec351..c8a8df09 100644
> > --- a/src/gstreamer/gstlibcamera-utils.cpp
> > +++ b/src/gstreamer/gstlibcamera-utils.cpp
> > @@ -416,6 +416,10 @@ gst_libcamera_configure_stream_from_caps(StreamConfiguration &stream_cfg,
> >  		stream_cfg.pixelFormat = gst_format_to_pixel_format(gst_format);
> >  	} else if (gst_structure_has_name(s, "image/jpeg")) {
> >  		stream_cfg.pixelFormat = formats::MJPEG;
> > +	} else if (gst_structure_has_name(s, "sensor/mode")) {
> > +		gst_structure_fixate_field(s, "format");
> > +		const gchar *format = gst_structure_get_string(s, "format");
> > +		stream_cfg.pixelFormat = PixelFormat::fromString(format);
> >  	} else {
> >  		g_critical("Unsupported media type: %s", gst_structure_get_name(s));
> >  	}
> > diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp
> > index a10cbd4f..2f05a03f 100644
> > --- a/src/gstreamer/gstlibcamerasrc.cpp
> > +++ b/src/gstreamer/gstlibcamerasrc.cpp
> > @@ -147,6 +147,9 @@ struct _GstLibcameraSrc {
> > 
> >  	gchar *camera_name;
> > 
> > +	GstCaps *sensor_modes;
> > +	GstCaps *sensor_mode;
> > +
> >  	GstLibcameraSrcState *state;
> >  	GstLibcameraAllocator *allocator;
> >  	GstFlowCombiner *flow_combiner;
> > @@ -154,7 +157,10 @@ struct _GstLibcameraSrc {
> > 
> >  enum {
> >  	PROP_0,
> > -	PROP_CAMERA_NAME
> > +	PROP_CAMERA_NAME,
> > +	PROP_SENSOR_MODES,
> > +	PROP_SENSOR_MODE,
> > +	PROP_LAST
> >  };
> > 
> >  G_DEFINE_TYPE_WITH_CODE(GstLibcameraSrc, gst_libcamera_src, GST_TYPE_ELEMENT,
> > @@ -318,6 +324,61 @@ int GstLibcameraSrcState::processRequest()
> >  	return err;
> >  }
> > 
> > +static GstCaps *
> > +gst_libcamera_src_enumerate_sensor_modes(GstLibcameraSrc *self)
> > +{
> > +	GstCaps *modes = gst_caps_new_empty();
> > +	GstLibcameraSrcState *state = self->state;
> > +	auto config = state->cam_->generateConfiguration({ libcamera::StreamRole::Raw,
> > +							   libcamera::StreamRole::VideoRecording });
> 
> This clearly shows our API falls short, as the presence of the RAW
> stream, and the ability to capture it alongside other streams, is
> conditional on the platform's capabilities. So this can't be
> considered a sufficiently stable way of retrieving information about
> the sensor's configuration. Not your fault, we currently don't have
> anything better to offer.
> 
> We currently have a way to report all the possible configurations for
> a stream using the StreamFormat class. StreamFormat is currently
> nothing but
> 
> 	std::map<PixelFormat, std::vector<SizeRange>> formats_;
> 
> which gets populated by the pipeline handler while creating
> the StreamConfiguration items which will be part of the generated
> CameraConfiguration.
> 
> That's for the "Camera-provided information to applications" side.
> 
> On the "application to configure the Camera" side, StreamConfiguration
> is used to tell the library how a Stream should be configured.
> These are the fields a StreamConfiguration describe:
> 
> 	PixelFormat pixelFormat;
> 	Size size;
> 	unsigned int stride;
> 	unsigned int frameSize;
> 
> 	unsigned int bufferCount;
> 
> 	std::optional<ColorSpace> colorSpace;
> 
> With pixelformat, size, bufferCount and (optionally) colorSpace which
> are expected to be set by the application.
> 
> We have been discussing about adding to StreamConfiguration a
> frameDuration since a long time. It is needed for some parts of the
> Android HAL as well, and in general, as your patch shows, it is a
> desirable feature. A similar reasoning can be made for the sensor's
> configuration, even if this has received several push backs in the
> past when RPi wanted something similar. Furthermore, I would be
> tempted to consider the possible FrameDuration a property of the
> sensor's configuration, but I presume that advanced use cases for
> StillCapture chain to the canonical frame processing more stages, so
> I presume it's better to have the FrameDurations separate from the
> sensor configuration to allow describe additional delays there.

Just a note here, what matters to application is not as much the final frame
duration (framerate) but the maximum imposed by the select camera mode. An
application will simply want to advertise lets say 60fps, if there is a mode
that can theoretically achieve it. At the moment, it falls short, as even if
there is a mode, if you aren't lucky enough, it may not work.

> 
> Summing it all up, we have StreamFormats to convey back to application
> what a Stream can do, and StreamConfiguration to set the Stream
> characteristics.
> 
> Addressing the needs you have here would require:
> 
> - Pipeline handlers to populate StreamFormats not only with
>   <PixelFormat, vector<SizeRange>> but also with the possible sensor's
>   configuration that can generated that mode and the possible frame
>   durations
> 
> - Application to populate a StreamConfiguration with the desired
>   FrameDuration and sensor mode
> 
> - Pipeline handler to validate that the desired sensor mode and
>   durations are compatible with the stream characteristics.
>   (as an example, if your pipeline can't upscale, you can't have a
>   sensor mode with a size smaller than the desired output).
> 
> This puts more complexity on the pipeline side, in both the generation
> of configurations and in their validation, but would such an API
> address your needs in your opinion ? Have you had any thoughts on how
> you would like to have this handled ?

Currently, stepping back form the implementation details, I identify two
issues. 

1. Is the inability for the pipeline to do that right thing.

As the pipeline is not aware of the desired frameDuration(s) at the time of
validation, it is unable to do the right thing for an application that wants
lets 60fps but the "best" mode does not cover it.

2. Is the lack of direct sensor mode enumeration

This is what you describe, but my learning is that sensor mode description goes
beyond format/width/height/frameDuration. For an application, it is imperative
to know what portion will remain from the original field of view, and what is
the initial field of view. I'm not sure I have collected all the details if any
of this actually exist.

So, if I read your proposal well, you suggest to add all supported camera modes
to each of the enumrated format/size. As the camera configuration becomes a
thing, we then have the ability to introduce more optional metadata there to
solve 2. I think that could work, as there is certainly a strong correlation
between the format/size and the available modes. Making a list of modes like
libcamera-apps do would be more difficult though (apps have to deal with
duplicates). Is that a concern ? (its not from my point of view, and we can use
immutable shared pointer in C++ to do this at no cost, we could even define that
pointer comparison is enough to match duplicates).

> 
> That's really an open question for everyone, as I guess this part is
> becoming critical as this patch shows and the workaround we pushed RPi
> to is really not generic enough.

Agreed, and looking for feedback. I'm be happy to help with the subject. I know
this patch was only making an issue more visible, but I'm also sharing it as it
may temporally be useful to some users (as a downstream patch).

> 
> One more point here below
> 
> Thanks
>    j
> 
> -------------------------------------------------------------------------------
> Slightly unrelated point, but while you're here I would like to pick
> your brain on the issue: the way StreamFormat are currently created is
> slightly under-specified. The documentation goes in great length in
> describing how the sizes associated with PixelFormat can be inspected
> [1] but it only superficially describe how a pipeline handler should
> populate the formats associated with a stream configuration [2]. I'll
> make two examples:
> - if the StreamConfiguration is for a 1080p Viewfinder stream should
>   the associated StreamFormats list resolutions > 1080p ?

If you have scaling in your ISP, I think it should. I notice that feeding the
ISP with higher resolution often lead to a cleaner image. An application can
then decide to focus on reducing the bandwidth (and possibly the battery life as
a side effect) by matching the resolution.

The other reason is that your viewFinder will likely match the display
resolution. If you do viewFinder + still pictures, and you don't want the
viewFinder going blank while taking a picture, you will have no choice but to
set the sensor at the desired still picture resolution.

Finally, if you don't expose the higher resolution, a generic caps mechanism
like GstCaps can't be used. When you have multiple streams, you will want to
intersect the sensor configuration for every streams in order to find the
remaining available sensor modes you can pick from. GstCaps will not allow you
to select higher sensor resolution if you don't include them, as there will be
no intersection. This is a mechanism you'll find in GStreamer and Pipewire,
which depends on having capability that are close to the mathematical
definitions of sets (with its own salt).

> - if the StreamConfiguration is for a !Raw role, should RAW
>   pixelformats be listed in the StreamFormats ?

I've been trying to answer this one, but I realize I don't know if I understand
the full extent of the question. Is raw role going away ? If yes, then we'll
have to offer the raw formats into all the other roles. But we then loose a bit,
as the Raw role have special meaning, and can greatly help the pipeline when
configuring (a raw role configuration had precedence over all the other, making
things a little simpler when fixating the configuration). I think we have to
decide, but as of my today's thinking, the raw roles could remain, its just that
the configuration will just be 1:1 with the sensor configuration.

> 
> There are arguments for both cases, but I guess the big picture
> question is about how an application should expect to retrieve the
> full camera capabilities:
> - Does an application expect to receive only the available formats for
>   the role it has asked a configuration for ?
> - Does an application expect to receive all the possible formats a
>   camera can produce regardless of the size/pixelformat of the
>   StreamConfiguration that has been generated ?
> 
> As a concrete example, currently the RkISP1 pipeline handler list a
> RAW StreamFormat for all configurations, regardless of the role. I had
> a patch to "fix" this but I've been pointed to the fact the API was
> not very well specified.
> 
> What would your expectations be in this case ?

That is interesting, so today we have a bit of both. The most basic applications
needs to match two roles already (ViewFinder + X). So in term of sensor
configuration, without going into slow validation loops, have to cross over the
information across the roles.

So overall, I think my preference would be something structured like this:

Role1:
  + format/size
	- Sensor Config1
	- Sensor Config2
	- Sensor Config3
  + format/size
	- Sensor Config2
Role2:
  ...

The stream configuration as defined today seems well suited. Keeping the fixated
frameDuration away from it seems unavoidable. There is just too many variable
toward the fixation of the frameDuration, on top of which, applications are much
more flexible then they pretend. As an example, when I see:

   libcamerasrc ! video/x-raw,framerate=60/1 ! ...

I see a very naive application. I'm very doubtful that 121/2 or 119/2 would not
have done the equally the job. And even 50/1 would likely be fine for most use
cases. In GStreamer, you can (and should) use ranges when setting filters.

  libcamerasrc ! video/x-raw,framerate=[40/1,70/1] ! ...

If you have a strong preference for 60, you should hint that this way (note, as
I said in the review, the newly added caps negotiation is not working yet for
that):

  libcamerasrc ! video/x-raw,framerate=60/1;video/x-raw,framerate=[40/1,70/1] ! ...

And libcamerasrc is free to read the first structure, and same all the fixed
values as the "default", and later use oriented fixation function
(gst_structure_fixate_nearest_fraction()). This gets complex quickly, so no one
ever implement this ahead of time, but its all there and well defined. And quite
representative of how far an application can go to do the right thing.

How the sensor configuration would go, done by application or pipeline, would be
to collect all sensorConfigurations for all the streams fixated
streamConfiguration. Intersect, and finally score the remaining to pick the
best.

Hope this quick GstCaps intro can help,
Nicolas

> 
> [1] https://libcamera.org/api-html/classlibcamera_1_1StreamFormats.html
> [2] https://libcamera.org/api-html/structlibcamera_1_1StreamConfiguration.html#a37725e6b9e179ca80be0e57ed97857d3
> -------------------------------------------------------------------------------
> 
> 
> > +	if (config == nullptr) {
> > +		GST_DEBUG_OBJECT(self, "Sensor mode selection is not supported, skipping enumeration.");
> > +		return modes;
> > +	}
> > +
> > +	const libcamera::StreamFormats &formats = config->at(0).formats();
> > +
> > +	for (const auto &pixfmt : formats.pixelformats()) {
> > +		for (const auto &size : formats.sizes(pixfmt)) {
> > +			config->at(0).size = size;
> > +			config->at(0).pixelFormat = pixfmt;
> > +
> > +			if (config->validate() == CameraConfiguration::Invalid)
> > +				continue;
> > +
> > +			if (state->cam_->configure(config.get()))
> > +				continue;
> > +
> > +			auto fd_ctrl = state->cam_->controls().find(&controls::FrameDurationLimits);
> > +			if (fd_ctrl == state->cam_->controls().end())
> > +				continue;
> > +
> > +			int minrate_num, minrate_denom;
> > +			int maxrate_num, maxrate_denom;
> > +			double min_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> > +					       gst_util_guint64_to_gdouble(fd_ctrl->second.max().get<int64_t>());
> > +			double max_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> > +					       gst_util_guint64_to_gdouble(fd_ctrl->second.min().get<int64_t>());
> > +			gst_util_double_to_fraction(min_framerate, &minrate_num, &minrate_denom);
> > +			gst_util_double_to_fraction(max_framerate, &maxrate_num, &maxrate_denom);
> > +
> > +			GstStructure *s = gst_structure_new("sensor/mode",
> > +							    "format", G_TYPE_STRING, pixfmt.toString().c_str(),
> > +							    "width", G_TYPE_INT, size.width,
> > +							    "height", G_TYPE_INT, size.height,
> > +							    "framerate", GST_TYPE_FRACTION_RANGE,
> > +							    minrate_num, minrate_denom,
> > +							    maxrate_num, maxrate_denom,
> > +							    nullptr);
> > +			gst_caps_append_structure(modes, s);
> > +		}
> > +	}
> > +
> > +	GST_DEBUG_OBJECT(self, "Camera sensor modes: %" GST_PTR_FORMAT, modes);
> > +
> > +	return modes;
> > +}
> > +
> >  static bool
> >  gst_libcamera_src_open(GstLibcameraSrc *self)
> >  {
> > @@ -375,6 +436,7 @@ gst_libcamera_src_open(GstLibcameraSrc *self)
> >  	/* No need to lock here, we didn't start our threads yet. */
> >  	self->state->cm_ = cm;
> >  	self->state->cam_ = cam;
> > +	self->sensor_modes = gst_libcamera_src_enumerate_sensor_modes(self);
> > 
> >  	return true;
> >  }
> > @@ -462,6 +524,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> >  	GstLibcameraSrcState *state = self->state;
> >  	GstFlowReturn flow_ret = GST_FLOW_OK;
> >  	gint ret;
> > +	g_autoptr(GstCaps) sensor_mode = nullptr;
> > 
> >  	g_autoptr(GstStructure) element_caps = gst_structure_new_empty("caps");
> > 
> > @@ -481,6 +544,16 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> >  		roles.push_back(gst_libcamera_pad_get_role(srcpad));
> >  	}
> > 
> > +	if (!gst_caps_is_any(self->sensor_mode)) {
> > +		sensor_mode = gst_caps_intersect(self->sensor_mode, self->sensor_modes);
> > +		if (!gst_caps_is_empty(sensor_mode)) {
> > +			roles.push_back(libcamera::StreamRole::Raw);
> > +		} else {
> > +			GST_WARNING_OBJECT(self, "No sensor mode matching the selection, ignoring.");
> > +			gst_clear_caps(&sensor_mode);
> > +		}
> > +	}
> > +
> >  	/* Generate the stream configurations, there should be one per pad. */
> >  	state->config_ = state->cam_->generateConfiguration(roles);
> >  	if (state->config_ == nullptr) {
> > @@ -490,7 +563,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> >  		gst_task_stop(task);
> >  		return;
> >  	}
> > -	g_assert(state->config_->size() == state->srcpads_.size());
> > +	g_assert(state->config_->size() == state->srcpads_.size() + (!!sensor_mode));
> > 
> >  	for (gsize i = 0; i < state->srcpads_.size(); i++) {
> >  		GstPad *srcpad = state->srcpads_[i];
> > @@ -510,6 +583,12 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> >  		gst_libcamera_get_framerate_from_caps(caps, element_caps);
> >  	}
> > 
> > +	if (sensor_mode) {
> > +		StreamConfiguration &stream_cfg = state->config_->at(state->srcpads_.size());
> > +		g_assert(gst_caps_is_writable(sensor_mode));
> > +		gst_libcamera_configure_stream_from_caps(stream_cfg, sensor_mode);
> > +	}
> > +
> >  	if (flow_ret != GST_FLOW_OK)
> >  		goto done;
> > 
> > @@ -624,6 +703,7 @@ gst_libcamera_src_task_leave([[maybe_unused]] GstTask *task,
> >  	g_clear_object(&self->allocator);
> >  	g_clear_pointer(&self->flow_combiner,
> >  			(GDestroyNotify)gst_flow_combiner_free);
> > +	gst_clear_caps(&self->sensor_modes);
> >  }
> > 
> >  static void
> > @@ -659,6 +739,10 @@ gst_libcamera_src_set_property(GObject *object, guint prop_id,
> >  		g_free(self->camera_name);
> >  		self->camera_name = g_value_dup_string(value);
> >  		break;
> > +	case PROP_SENSOR_MODE:
> > +		gst_clear_caps(&self->sensor_mode);
> > +		self->sensor_mode = GST_CAPS(g_value_dup_boxed(value));
> > +		break;
> >  	default:
> >  		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
> >  		break;
> > @@ -676,6 +760,12 @@ gst_libcamera_src_get_property(GObject *object, guint prop_id, GValue *value,
> >  	case PROP_CAMERA_NAME:
> >  		g_value_set_string(value, self->camera_name);
> >  		break;
> > +	case PROP_SENSOR_MODES:
> > +		g_value_set_boxed(value, self->sensor_modes);
> > +		break;
> > +	case PROP_SENSOR_MODE:
> > +		g_value_set_boxed(value, self->sensor_mode);
> > +		break;
> >  	default:
> >  		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
> >  		break;
> > @@ -763,6 +853,7 @@ gst_libcamera_src_init(GstLibcameraSrc *self)
> >  	/* C-style friend. */
> >  	state->src_ = self;
> >  	self->state = state;
> > +	self->sensor_mode = gst_caps_new_any();
> >  }
> > 
> >  static GstPad *
> > @@ -844,4 +935,19 @@ gst_libcamera_src_class_init(GstLibcameraSrcClass *klass)
> >  							     | G_PARAM_READWRITE
> >  							     | G_PARAM_STATIC_STRINGS));
> >  	g_object_class_install_property(object_class, PROP_CAMERA_NAME, spec);
> > +
> > +	spec = g_param_spec_boxed("sensor-modes", "Sensor Modes",
> > +				  "GstCaps representing available sensor modes.",
> > +				  GST_TYPE_CAPS,
> > +				  (GParamFlags)(G_PARAM_READABLE
> > +					        | G_PARAM_STATIC_STRINGS));
> > +	g_object_class_install_property(object_class, PROP_SENSOR_MODES, spec);
> > +
> > +	spec = g_param_spec_boxed("sensor-mode", "Sensor Mode",
> > +				  "GstCaps representing selected sensor mode.",
> > +				  GST_TYPE_CAPS,
> > +				  (GParamFlags)(GST_PARAM_MUTABLE_READY
> > +					        | G_PARAM_READWRITE
> > +					        | G_PARAM_STATIC_STRINGS));
> > +	g_object_class_install_property(object_class, PROP_SENSOR_MODE, spec);
> >  }
> > --
> > 2.39.2
> >
Jacopo Mondi March 27, 2023, 3:19 p.m. UTC | #5
Hi Nicolas,

On Mon, Mar 27, 2023 at 09:40:57AM -0400, Nicolas Dufresne wrote:
> Le lundi 27 mars 2023 à 11:21 +0200, Jacopo Mondi a écrit :
> > Hi Nicolas,
> >
> > On Fri, Mar 24, 2023 at 02:12:45PM -0400, Nicolas Dufresne via libcamera-devel wrote:
> > > From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > >
> > > This add support for selecting the sensor mode. A new read-only
> > > property called sensor-modes is filled when the element reaches
> > > READY state. It contains the list of all available sensor modes,
> > > including the supported framerate range. This is exposed as GstCaps
> > > in the form of sensor/mode,width=X,height=Y,format=Y,framerate=[...].
> > > The format string matches the libcamera format string representation.
> > >
> > > The application can then select a mode using the read/write sensor-mode
> > > control. The selected mode is also a caps, it will be intersected with
> > > the supported mode and the "best" match will be picked. This allows
> > > application to use simple filter when they want to pick a mode for lets
> > > say a specific framerate (e.g. sensor/mode,framerate=60/1).
> > >
> > > Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > ---
> > >  src/gstreamer/gstlibcamera-utils.cpp |   4 +
> > >  src/gstreamer/gstlibcamerasrc.cpp    | 110 ++++++++++++++++++++++++++-
> > >  2 files changed, 112 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/src/gstreamer/gstlibcamera-utils.cpp b/src/gstreamer/gstlibcamera-utils.cpp
> > > index 750ec351..c8a8df09 100644
> > > --- a/src/gstreamer/gstlibcamera-utils.cpp
> > > +++ b/src/gstreamer/gstlibcamera-utils.cpp
> > > @@ -416,6 +416,10 @@ gst_libcamera_configure_stream_from_caps(StreamConfiguration &stream_cfg,
> > >  		stream_cfg.pixelFormat = gst_format_to_pixel_format(gst_format);
> > >  	} else if (gst_structure_has_name(s, "image/jpeg")) {
> > >  		stream_cfg.pixelFormat = formats::MJPEG;
> > > +	} else if (gst_structure_has_name(s, "sensor/mode")) {
> > > +		gst_structure_fixate_field(s, "format");
> > > +		const gchar *format = gst_structure_get_string(s, "format");
> > > +		stream_cfg.pixelFormat = PixelFormat::fromString(format);
> > >  	} else {
> > >  		g_critical("Unsupported media type: %s", gst_structure_get_name(s));
> > >  	}
> > > diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp
> > > index a10cbd4f..2f05a03f 100644
> > > --- a/src/gstreamer/gstlibcamerasrc.cpp
> > > +++ b/src/gstreamer/gstlibcamerasrc.cpp
> > > @@ -147,6 +147,9 @@ struct _GstLibcameraSrc {
> > >
> > >  	gchar *camera_name;
> > >
> > > +	GstCaps *sensor_modes;
> > > +	GstCaps *sensor_mode;
> > > +
> > >  	GstLibcameraSrcState *state;
> > >  	GstLibcameraAllocator *allocator;
> > >  	GstFlowCombiner *flow_combiner;
> > > @@ -154,7 +157,10 @@ struct _GstLibcameraSrc {
> > >
> > >  enum {
> > >  	PROP_0,
> > > -	PROP_CAMERA_NAME
> > > +	PROP_CAMERA_NAME,
> > > +	PROP_SENSOR_MODES,
> > > +	PROP_SENSOR_MODE,
> > > +	PROP_LAST
> > >  };
> > >
> > >  G_DEFINE_TYPE_WITH_CODE(GstLibcameraSrc, gst_libcamera_src, GST_TYPE_ELEMENT,
> > > @@ -318,6 +324,61 @@ int GstLibcameraSrcState::processRequest()
> > >  	return err;
> > >  }
> > >
> > > +static GstCaps *
> > > +gst_libcamera_src_enumerate_sensor_modes(GstLibcameraSrc *self)
> > > +{
> > > +	GstCaps *modes = gst_caps_new_empty();
> > > +	GstLibcameraSrcState *state = self->state;
> > > +	auto config = state->cam_->generateConfiguration({ libcamera::StreamRole::Raw,
> > > +							   libcamera::StreamRole::VideoRecording });
> >
> > This clearly shows our API falls short, as the presence of the RAW
> > stream, and the ability to capture it alongside other streams, is
> > conditional on the platform's capabilities. So this can't be
> > considered a sufficiently stable way of retrieving information about
> > the sensor's configuration. Not your fault, we currently don't have
> > anything better to offer.
> >
> > We currently have a way to report all the possible configurations for
> > a stream using the StreamFormat class. StreamFormat is currently
> > nothing but
> >
> > 	std::map<PixelFormat, std::vector<SizeRange>> formats_;
> >
> > which gets populated by the pipeline handler while creating
> > the StreamConfiguration items which will be part of the generated
> > CameraConfiguration.
> >
> > That's for the "Camera-provided information to applications" side.
> >
> > On the "application to configure the Camera" side, StreamConfiguration
> > is used to tell the library how a Stream should be configured.
> > These are the fields a StreamConfiguration describe:
> >
> > 	PixelFormat pixelFormat;
> > 	Size size;
> > 	unsigned int stride;
> > 	unsigned int frameSize;
> >
> > 	unsigned int bufferCount;
> >
> > 	std::optional<ColorSpace> colorSpace;
> >
> > With pixelformat, size, bufferCount and (optionally) colorSpace which
> > are expected to be set by the application.
> >
> > We have been discussing about adding to StreamConfiguration a
> > frameDuration since a long time. It is needed for some parts of the
> > Android HAL as well, and in general, as your patch shows, it is a
> > desirable feature. A similar reasoning can be made for the sensor's
> > configuration, even if this has received several push backs in the
> > past when RPi wanted something similar. Furthermore, I would be
> > tempted to consider the possible FrameDuration a property of the
> > sensor's configuration, but I presume that advanced use cases for
> > StillCapture chain to the canonical frame processing more stages, so
> > I presume it's better to have the FrameDurations separate from the
> > sensor configuration to allow describe additional delays there.
>
> Just a note here, what matters to application is not as much the final frame
> duration (framerate) but the maximum imposed by the select camera mode. An
> application will simply want to advertise lets say 60fps, if there is a mode
> that can theoretically achieve it. At the moment, it falls short, as even if
> there is a mode, if you aren't lucky enough, it may not work.
>

Correct. My point was about StillCapture potentially imposing additional
delays on top of the sensor's frame duration.

As far as I understand advanced platforms might pass the frames
directed for still picture capture through other processing blocks.
This requires time and this latency should be reported somewhere.

> >
> > Summing it all up, we have StreamFormats to convey back to application
> > what a Stream can do, and StreamConfiguration to set the Stream
> > characteristics.
> >
> > Addressing the needs you have here would require:
> >
> > - Pipeline handlers to populate StreamFormats not only with
> >   <PixelFormat, vector<SizeRange>> but also with the possible sensor's
> >   configuration that can generated that mode and the possible frame
> >   durations
> >
> > - Application to populate a StreamConfiguration with the desired
> >   FrameDuration and sensor mode
> >
> > - Pipeline handler to validate that the desired sensor mode and
> >   durations are compatible with the stream characteristics.
> >   (as an example, if your pipeline can't upscale, you can't have a
> >   sensor mode with a size smaller than the desired output).
> >
> > This puts more complexity on the pipeline side, in both the generation
> > of configurations and in their validation, but would such an API
> > address your needs in your opinion ? Have you had any thoughts on how
> > you would like to have this handled ?
>
> Currently, stepping back form the implementation details, I identify two
> issues. 
>
> 1. Is the inability for the pipeline to do that right thing.
>
> As the pipeline is not aware of the desired frameDuration(s) at the time of
> validation, it is unable to do the right thing for an application that wants
> lets 60fps but the "best" mode does not cover it.
>
> 2. Is the lack of direct sensor mode enumeration
>
> This is what you describe, but my learning is that sensor mode description goes
> beyond format/width/height/frameDuration. For an application, it is imperative
> to know what portion will remain from the original field of view, and what is
> the initial field of view. I'm not sure I have collected all the details if any
> of this actually exist.

We have these information. The full pixel array size is a static
property of the Camera, we also know the per-mode analogue crop
rectangle (the portion of the pixel array that gets processed). So we
can expose it alongside the sensor's output sizes, format and
duration.

I'm a bit missing how application might be interested in the sensor's
output format, even if I know very specialized applications (like
some of the RPi users have) want to pick the mode with, for
example, the highest bit resolution.

>
> So, if I read your proposal well, you suggest to add all supported camera modes
> to each of the enumrated format/size. As the camera configuration becomes a
> thing, we then have the ability to introduce more optional metadata there to
> solve 2. I think that could work, as there is certainly a strong correlation
> between the format/size and the available modes. Making a list of modes like
> libcamera-apps do would be more difficult though (apps have to deal with
> duplicates). Is that a concern ? (its not from my point of view, and we can use
> immutable shared pointer in C++ to do this at no cost, we could even define that
> pointer comparison is enough to match duplicates).
>

Duplications (when it comes to efficiency) are not my primary concern
as generateConfiguration() is not really in any hot path.

My struggle is with dependencies between Streams.

Let's make a practical (simplified) example that only consider the
sensor output size and the min duration a mode can achieve

Your sensor can do the following modes

        full res:       { 4144x3136, 66ms }
        2x2 binned      { 2072x1568, 16ms }

        - generateConfiguration({ Viewfinder });

          Easy, you can get 1080p from both modes and application
          select the one they prefer looking at durations/FOV

        - generateConfiguration({ Viewfinder, Still });

          Still wants full resolution, so you can't use the fastest
          { 2072x1568 } mode. As a result we can only associate
          { 4144x3136 } to both Roles. Your camera now runs at 16FPS
          (plus Still can have additional delays, as explained above,
          but they only apply when the Still stream is part of the Request).

My point here is that information like the duration depend on the overall
CameraConfiguration and not on the single stream capabilities. In
other words, it cannot be estimated in advance if not after a
configuration has been applied (or validated) by the Camera.

As a comparison, Android lists the per-stream information (format and
duration) as they were considered in isolation. It's up to applications
to understand that if they configure { Viewfinder, Still } the
resulting duration will be max(Viewfinder.duration, Still.duration).

This directly connects to the below question of what StreamFormats
should report: all modes a role can do as it would be considered in
isolation or should they be tied to the combination of the requested
streams ?

Honestly, generateConfiguration() is handy to make application's life
easier when they want some pre-cooked configuration. But for anything
more than that I would be tempted to propose

        StreamFormats Camera::streamsFormats()
        StreamFormats Camera::stallingStreamsFormats()
        StreamFormats Camera::rawStreamsFormats()

        (Yes, that's the same model Android has)

The argument against this was that application should now know how to
combine the Streams in order to form a valid CameraConfiguration, and
this requires to know the platform capabilities (can the Camera do 2
YUV streams ? Can it do 2 YUV + RAW ? etc).

Again for a comparison, Android has "hw levels" that describe what a
platform can do, so if your platform is "FULL" you are guaranteed that
you can combine up to a certain number of YUV + RAW, if it's "LIMITED"
you can't have more than 1 YUV + RAW etc etc so that apps know what
they can expect.

> >
> > That's really an open question for everyone, as I guess this part is
> > becoming critical as this patch shows and the workaround we pushed RPi
> > to is really not generic enough.
>
> Agreed, and looking for feedback. I'm be happy to help with the subject. I know
> this patch was only making an issue more visible, but I'm also sharing it as it
> may temporally be useful to some users (as a downstream patch).
>
> >
> > One more point here below
> >
> > Thanks
> >    j
> >
> > -------------------------------------------------------------------------------
> > Slightly unrelated point, but while you're here I would like to pick
> > your brain on the issue: the way StreamFormat are currently created is
> > slightly under-specified. The documentation goes in great length in
> > describing how the sizes associated with PixelFormat can be inspected
> > [1] but it only superficially describe how a pipeline handler should
> > populate the formats associated with a stream configuration [2]. I'll
> > make two examples:
> > - if the StreamConfiguration is for a 1080p Viewfinder stream should
> >   the associated StreamFormats list resolutions > 1080p ?
>
> If you have scaling in your ISP, I think it should. I notice that feeding the
> ISP with higher resolution often lead to a cleaner image. An application can
> then decide to focus on reducing the bandwidth (and possibly the battery life as
> a side effect) by matching the resolution.

I was not talking about the sensor's mode, but about the Stream output size.

IOW, if you ask for { Viewfinder } you (generally) get back a stream
config with size (1920x1080). Should the StreamFormats associated with
such config list output formats larger than 1080p ?

>
> The other reason is that your viewFinder will likely match the display
> resolution. If you do viewFinder + still pictures, and you don't want the
> viewFinder going blank while taking a picture, you will have no choice but to
> set the sensor at the desired still picture resolution.
>

Can't disagree

> Finally, if you don't expose the higher resolution, a generic caps mechanism
> like GstCaps can't be used. When you have multiple streams, you will want to
> intersect the sensor configuration for every streams in order to find the
> remaining available sensor modes you can pick from. GstCaps will not allow you
> to select higher sensor resolution if you don't include them, as there will be
> no intersection. This is a mechanism you'll find in GStreamer and Pipewire,
> which depends on having capability that are close to the mathematical
> definitions of sets (with its own salt).
>

I guess the main issue is that gst (as many potential other
users) want to get a general view of what the Camera can do, and
then decide how to combine them. We have an API that tells you how the
camera will behave by considering the overall configuration of streams.

Back to the above example, if you { Viewfinder, Still } you get 16FPS camera.

> > - if the StreamConfiguration is for a !Raw role, should RAW
> >   pixelformats be listed in the StreamFormats ?
>
> I've been trying to answer this one, but I realize I don't know if I understand
> the full extent of the question. Is raw role going away ? If yes, then we'll

No no, it's not :)

> have to offer the raw formats into all the other roles. But we then loose a bit,
> as the Raw role have special meaning, and can greatly help the pipeline when
> configuring (a raw role configuration had precedence over all the other, making
> things a little simpler when fixating the configuration). I think we have to
> decide, but as of my today's thinking, the raw roles could remain, its just that
> the configuration will just be 1:1 with the sensor configuration.
>

Sorry this wasn't clear. To me this is tightly related to how
StreamFormats should be interpreted. Should they report what the
camera can do in the current configuration, or expose the general
capabilities of the Camera ? Today we do a bit of both. The example
with RAW was that if you ask

        { Viewfinder, Still }

Currently on RkISP1 you also get back a few StreamFormat for the RAW
configuration:

        { SBGGR10, 4144x3136 }
        { SBGGR10, 2072x1568 }

even if you haven't asked for them.

This seems to highlight that StreamFormats should report all the
capabilities of the Camera, including formats you can't (or shouldn't)
use for a role, which seems just confusing and defeats the purpose of
Roles in first place.

> >
> > There are arguments for both cases, but I guess the big picture
> > question is about how an application should expect to retrieve the
> > full camera capabilities:
> > - Does an application expect to receive only the available formats for
> >   the role it has asked a configuration for ?
> > - Does an application expect to receive all the possible formats a
> >   camera can produce regardless of the size/pixelformat of the
> >   StreamConfiguration that has been generated ?
> >
> > As a concrete example, currently the RkISP1 pipeline handler list a
> > RAW StreamFormat for all configurations, regardless of the role. I had
> > a patch to "fix" this but I've been pointed to the fact the API was
> > not very well specified.
> >
> > What would your expectations be in this case ?
>
> That is interesting, so today we have a bit of both. The most basic applications
> needs to match two roles already (ViewFinder + X). So in term of sensor
> configuration, without going into slow validation loops, have to cross over the
> information across the roles.
>
> So overall, I think my preference would be something structured like this:
>
> Role1:
>   + format/size
> 	- Sensor Config1
> 	- Sensor Config2
> 	- Sensor Config3
>   + format/size
> 	- Sensor Config2
> Role2:
>   ...
>
> The stream configuration as defined today seems well suited. Keeping the fixated
> frameDuration away from it seems unavoidable. There is just too many variable

What do you mean with "fixated frameDuration" ?

> toward the fixation of the frameDuration, on top of which, applications are much
> more flexible then they pretend. As an example, when I see:
>
>    libcamerasrc ! video/x-raw,framerate=60/1 ! ...
>
> I see a very naive application. I'm very doubtful that 121/2 or 119/2 would not
> have done the equally the job. And even 50/1 would likely be fine for most use
> cases. In GStreamer, you can (and should) use ranges when setting filters.
>
>   libcamerasrc ! video/x-raw,framerate=[40/1,70/1] ! ...
>
> If you have a strong preference for 60, you should hint that this way (note, as
> I said in the review, the newly added caps negotiation is not working yet for
> that):
>
>   libcamerasrc ! video/x-raw,framerate=60/1;video/x-raw,framerate=[40/1,70/1] ! ...
>
> And libcamerasrc is free to read the first structure, and same all the fixed
> values as the "default", and later use oriented fixation function
> (gst_structure_fixate_nearest_fraction()). This gets complex quickly, so no one
> ever implement this ahead of time, but its all there and well defined. And quite
> representative of how far an application can go to do the right thing.
>
> How the sensor configuration would go, done by application or pipeline, would be
> to collect all sensorConfigurations for all the streams fixated
> streamConfiguration. Intersect, and finally score the remaining to pick the
> best.

Do I read it right that you then expect to receive all information
about what streams can be produced by a Camera as they were considered
in isolation, and combine them opportunely yourself at the time of
constructing a CameraConfiguration before starting capture ?

Thanks for sticking to this long conversation

>
> Hope this quick GstCaps intro can help,
> Nicolas
>
> >
> > [1] https://libcamera.org/api-html/classlibcamera_1_1StreamFormats.html
> > [2] https://libcamera.org/api-html/structlibcamera_1_1StreamConfiguration.html#a37725e6b9e179ca80be0e57ed97857d3
> > -------------------------------------------------------------------------------
> >
> >
> > > +	if (config == nullptr) {
> > > +		GST_DEBUG_OBJECT(self, "Sensor mode selection is not supported, skipping enumeration.");
> > > +		return modes;
> > > +	}
> > > +
> > > +	const libcamera::StreamFormats &formats = config->at(0).formats();
> > > +
> > > +	for (const auto &pixfmt : formats.pixelformats()) {
> > > +		for (const auto &size : formats.sizes(pixfmt)) {
> > > +			config->at(0).size = size;
> > > +			config->at(0).pixelFormat = pixfmt;
> > > +
> > > +			if (config->validate() == CameraConfiguration::Invalid)
> > > +				continue;
> > > +
> > > +			if (state->cam_->configure(config.get()))
> > > +				continue;
> > > +
> > > +			auto fd_ctrl = state->cam_->controls().find(&controls::FrameDurationLimits);
> > > +			if (fd_ctrl == state->cam_->controls().end())
> > > +				continue;
> > > +
> > > +			int minrate_num, minrate_denom;
> > > +			int maxrate_num, maxrate_denom;
> > > +			double min_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> > > +					       gst_util_guint64_to_gdouble(fd_ctrl->second.max().get<int64_t>());
> > > +			double max_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> > > +					       gst_util_guint64_to_gdouble(fd_ctrl->second.min().get<int64_t>());
> > > +			gst_util_double_to_fraction(min_framerate, &minrate_num, &minrate_denom);
> > > +			gst_util_double_to_fraction(max_framerate, &maxrate_num, &maxrate_denom);
> > > +
> > > +			GstStructure *s = gst_structure_new("sensor/mode",
> > > +							    "format", G_TYPE_STRING, pixfmt.toString().c_str(),
> > > +							    "width", G_TYPE_INT, size.width,
> > > +							    "height", G_TYPE_INT, size.height,
> > > +							    "framerate", GST_TYPE_FRACTION_RANGE,
> > > +							    minrate_num, minrate_denom,
> > > +							    maxrate_num, maxrate_denom,
> > > +							    nullptr);
> > > +			gst_caps_append_structure(modes, s);
> > > +		}
> > > +	}
> > > +
> > > +	GST_DEBUG_OBJECT(self, "Camera sensor modes: %" GST_PTR_FORMAT, modes);
> > > +
> > > +	return modes;
> > > +}
> > > +
> > >  static bool
> > >  gst_libcamera_src_open(GstLibcameraSrc *self)
> > >  {
> > > @@ -375,6 +436,7 @@ gst_libcamera_src_open(GstLibcameraSrc *self)
> > >  	/* No need to lock here, we didn't start our threads yet. */
> > >  	self->state->cm_ = cm;
> > >  	self->state->cam_ = cam;
> > > +	self->sensor_modes = gst_libcamera_src_enumerate_sensor_modes(self);
> > >
> > >  	return true;
> > >  }
> > > @@ -462,6 +524,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> > >  	GstLibcameraSrcState *state = self->state;
> > >  	GstFlowReturn flow_ret = GST_FLOW_OK;
> > >  	gint ret;
> > > +	g_autoptr(GstCaps) sensor_mode = nullptr;
> > >
> > >  	g_autoptr(GstStructure) element_caps = gst_structure_new_empty("caps");
> > >
> > > @@ -481,6 +544,16 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> > >  		roles.push_back(gst_libcamera_pad_get_role(srcpad));
> > >  	}
> > >
> > > +	if (!gst_caps_is_any(self->sensor_mode)) {
> > > +		sensor_mode = gst_caps_intersect(self->sensor_mode, self->sensor_modes);
> > > +		if (!gst_caps_is_empty(sensor_mode)) {
> > > +			roles.push_back(libcamera::StreamRole::Raw);
> > > +		} else {
> > > +			GST_WARNING_OBJECT(self, "No sensor mode matching the selection, ignoring.");
> > > +			gst_clear_caps(&sensor_mode);
> > > +		}
> > > +	}
> > > +
> > >  	/* Generate the stream configurations, there should be one per pad. */
> > >  	state->config_ = state->cam_->generateConfiguration(roles);
> > >  	if (state->config_ == nullptr) {
> > > @@ -490,7 +563,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> > >  		gst_task_stop(task);
> > >  		return;
> > >  	}
> > > -	g_assert(state->config_->size() == state->srcpads_.size());
> > > +	g_assert(state->config_->size() == state->srcpads_.size() + (!!sensor_mode));
> > >
> > >  	for (gsize i = 0; i < state->srcpads_.size(); i++) {
> > >  		GstPad *srcpad = state->srcpads_[i];
> > > @@ -510,6 +583,12 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> > >  		gst_libcamera_get_framerate_from_caps(caps, element_caps);
> > >  	}
> > >
> > > +	if (sensor_mode) {
> > > +		StreamConfiguration &stream_cfg = state->config_->at(state->srcpads_.size());
> > > +		g_assert(gst_caps_is_writable(sensor_mode));
> > > +		gst_libcamera_configure_stream_from_caps(stream_cfg, sensor_mode);
> > > +	}
> > > +
> > >  	if (flow_ret != GST_FLOW_OK)
> > >  		goto done;
> > >
> > > @@ -624,6 +703,7 @@ gst_libcamera_src_task_leave([[maybe_unused]] GstTask *task,
> > >  	g_clear_object(&self->allocator);
> > >  	g_clear_pointer(&self->flow_combiner,
> > >  			(GDestroyNotify)gst_flow_combiner_free);
> > > +	gst_clear_caps(&self->sensor_modes);
> > >  }
> > >
> > >  static void
> > > @@ -659,6 +739,10 @@ gst_libcamera_src_set_property(GObject *object, guint prop_id,
> > >  		g_free(self->camera_name);
> > >  		self->camera_name = g_value_dup_string(value);
> > >  		break;
> > > +	case PROP_SENSOR_MODE:
> > > +		gst_clear_caps(&self->sensor_mode);
> > > +		self->sensor_mode = GST_CAPS(g_value_dup_boxed(value));
> > > +		break;
> > >  	default:
> > >  		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
> > >  		break;
> > > @@ -676,6 +760,12 @@ gst_libcamera_src_get_property(GObject *object, guint prop_id, GValue *value,
> > >  	case PROP_CAMERA_NAME:
> > >  		g_value_set_string(value, self->camera_name);
> > >  		break;
> > > +	case PROP_SENSOR_MODES:
> > > +		g_value_set_boxed(value, self->sensor_modes);
> > > +		break;
> > > +	case PROP_SENSOR_MODE:
> > > +		g_value_set_boxed(value, self->sensor_mode);
> > > +		break;
> > >  	default:
> > >  		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
> > >  		break;
> > > @@ -763,6 +853,7 @@ gst_libcamera_src_init(GstLibcameraSrc *self)
> > >  	/* C-style friend. */
> > >  	state->src_ = self;
> > >  	self->state = state;
> > > +	self->sensor_mode = gst_caps_new_any();
> > >  }
> > >
> > >  static GstPad *
> > > @@ -844,4 +935,19 @@ gst_libcamera_src_class_init(GstLibcameraSrcClass *klass)
> > >  							     | G_PARAM_READWRITE
> > >  							     | G_PARAM_STATIC_STRINGS));
> > >  	g_object_class_install_property(object_class, PROP_CAMERA_NAME, spec);
> > > +
> > > +	spec = g_param_spec_boxed("sensor-modes", "Sensor Modes",
> > > +				  "GstCaps representing available sensor modes.",
> > > +				  GST_TYPE_CAPS,
> > > +				  (GParamFlags)(G_PARAM_READABLE
> > > +					        | G_PARAM_STATIC_STRINGS));
> > > +	g_object_class_install_property(object_class, PROP_SENSOR_MODES, spec);
> > > +
> > > +	spec = g_param_spec_boxed("sensor-mode", "Sensor Mode",
> > > +				  "GstCaps representing selected sensor mode.",
> > > +				  GST_TYPE_CAPS,
> > > +				  (GParamFlags)(GST_PARAM_MUTABLE_READY
> > > +					        | G_PARAM_READWRITE
> > > +					        | G_PARAM_STATIC_STRINGS));
> > > +	g_object_class_install_property(object_class, PROP_SENSOR_MODE, spec);
> > >  }
> > > --
> > > 2.39.2
> > >
>
Nicolas Dufresne March 28, 2023, 7:23 p.m. UTC | #6
Le lundi 27 mars 2023 à 17:19 +0200, Jacopo Mondi via libcamera-devel a écrit :
> Hi Nicolas,
> 
> On Mon, Mar 27, 2023 at 09:40:57AM -0400, Nicolas Dufresne wrote:
> > Le lundi 27 mars 2023 à 11:21 +0200, Jacopo Mondi a écrit :
> > > Hi Nicolas,
> > > 
> > > On Fri, Mar 24, 2023 at 02:12:45PM -0400, Nicolas Dufresne via libcamera-devel wrote:
> > > > From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > > 
> > > > This add support for selecting the sensor mode. A new read-only
> > > > property called sensor-modes is filled when the element reaches
> > > > READY state. It contains the list of all available sensor modes,
> > > > including the supported framerate range. This is exposed as GstCaps
> > > > in the form of sensor/mode,width=X,height=Y,format=Y,framerate=[...].
> > > > The format string matches the libcamera format string representation.
> > > > 
> > > > The application can then select a mode using the read/write sensor-mode
> > > > control. The selected mode is also a caps, it will be intersected with
> > > > the supported mode and the "best" match will be picked. This allows
> > > > application to use simple filter when they want to pick a mode for lets
> > > > say a specific framerate (e.g. sensor/mode,framerate=60/1).
> > > > 
> > > > Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > > ---
> > > >  src/gstreamer/gstlibcamera-utils.cpp |   4 +
> > > >  src/gstreamer/gstlibcamerasrc.cpp    | 110 ++++++++++++++++++++++++++-
> > > >  2 files changed, 112 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/src/gstreamer/gstlibcamera-utils.cpp b/src/gstreamer/gstlibcamera-utils.cpp
> > > > index 750ec351..c8a8df09 100644
> > > > --- a/src/gstreamer/gstlibcamera-utils.cpp
> > > > +++ b/src/gstreamer/gstlibcamera-utils.cpp
> > > > @@ -416,6 +416,10 @@ gst_libcamera_configure_stream_from_caps(StreamConfiguration &stream_cfg,
> > > >  		stream_cfg.pixelFormat = gst_format_to_pixel_format(gst_format);
> > > >  	} else if (gst_structure_has_name(s, "image/jpeg")) {
> > > >  		stream_cfg.pixelFormat = formats::MJPEG;
> > > > +	} else if (gst_structure_has_name(s, "sensor/mode")) {
> > > > +		gst_structure_fixate_field(s, "format");
> > > > +		const gchar *format = gst_structure_get_string(s, "format");
> > > > +		stream_cfg.pixelFormat = PixelFormat::fromString(format);
> > > >  	} else {
> > > >  		g_critical("Unsupported media type: %s", gst_structure_get_name(s));
> > > >  	}
> > > > diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp
> > > > index a10cbd4f..2f05a03f 100644
> > > > --- a/src/gstreamer/gstlibcamerasrc.cpp
> > > > +++ b/src/gstreamer/gstlibcamerasrc.cpp
> > > > @@ -147,6 +147,9 @@ struct _GstLibcameraSrc {
> > > > 
> > > >  	gchar *camera_name;
> > > > 
> > > > +	GstCaps *sensor_modes;
> > > > +	GstCaps *sensor_mode;
> > > > +
> > > >  	GstLibcameraSrcState *state;
> > > >  	GstLibcameraAllocator *allocator;
> > > >  	GstFlowCombiner *flow_combiner;
> > > > @@ -154,7 +157,10 @@ struct _GstLibcameraSrc {
> > > > 
> > > >  enum {
> > > >  	PROP_0,
> > > > -	PROP_CAMERA_NAME
> > > > +	PROP_CAMERA_NAME,
> > > > +	PROP_SENSOR_MODES,
> > > > +	PROP_SENSOR_MODE,
> > > > +	PROP_LAST
> > > >  };
> > > > 
> > > >  G_DEFINE_TYPE_WITH_CODE(GstLibcameraSrc, gst_libcamera_src, GST_TYPE_ELEMENT,
> > > > @@ -318,6 +324,61 @@ int GstLibcameraSrcState::processRequest()
> > > >  	return err;
> > > >  }
> > > > 
> > > > +static GstCaps *
> > > > +gst_libcamera_src_enumerate_sensor_modes(GstLibcameraSrc *self)
> > > > +{
> > > > +	GstCaps *modes = gst_caps_new_empty();
> > > > +	GstLibcameraSrcState *state = self->state;
> > > > +	auto config = state->cam_->generateConfiguration({ libcamera::StreamRole::Raw,
> > > > +							   libcamera::StreamRole::VideoRecording });
> > > 
> > > This clearly shows our API falls short, as the presence of the RAW
> > > stream, and the ability to capture it alongside other streams, is
> > > conditional on the platform's capabilities. So this can't be
> > > considered a sufficiently stable way of retrieving information about
> > > the sensor's configuration. Not your fault, we currently don't have
> > > anything better to offer.
> > > 
> > > We currently have a way to report all the possible configurations for
> > > a stream using the StreamFormat class. StreamFormat is currently
> > > nothing but
> > > 
> > > 	std::map<PixelFormat, std::vector<SizeRange>> formats_;
> > > 
> > > which gets populated by the pipeline handler while creating
> > > the StreamConfiguration items which will be part of the generated
> > > CameraConfiguration.
> > > 
> > > That's for the "Camera-provided information to applications" side.
> > > 
> > > On the "application to configure the Camera" side, StreamConfiguration
> > > is used to tell the library how a Stream should be configured.
> > > These are the fields a StreamConfiguration describe:
> > > 
> > > 	PixelFormat pixelFormat;
> > > 	Size size;
> > > 	unsigned int stride;
> > > 	unsigned int frameSize;
> > > 
> > > 	unsigned int bufferCount;
> > > 
> > > 	std::optional<ColorSpace> colorSpace;
> > > 
> > > With pixelformat, size, bufferCount and (optionally) colorSpace which
> > > are expected to be set by the application.
> > > 
> > > We have been discussing about adding to StreamConfiguration a
> > > frameDuration since a long time. It is needed for some parts of the
> > > Android HAL as well, and in general, as your patch shows, it is a
> > > desirable feature. A similar reasoning can be made for the sensor's
> > > configuration, even if this has received several push backs in the
> > > past when RPi wanted something similar. Furthermore, I would be
> > > tempted to consider the possible FrameDuration a property of the
> > > sensor's configuration, but I presume that advanced use cases for
> > > StillCapture chain to the canonical frame processing more stages, so
> > > I presume it's better to have the FrameDurations separate from the
> > > sensor configuration to allow describe additional delays there.
> > 
> > Just a note here, what matters to application is not as much the final frame
> > duration (framerate) but the maximum imposed by the select camera mode. An
> > application will simply want to advertise lets say 60fps, if there is a mode
> > that can theoretically achieve it. At the moment, it falls short, as even if
> > there is a mode, if you aren't lucky enough, it may not work.
> > 
> 
> Correct. My point was about StillCapture potentially imposing additional
> delays on top of the sensor's frame duration.
> 
> As far as I understand advanced platforms might pass the frames
> directed for still picture capture through other processing blocks.
> This requires time and this latency should be reported somewhere.
> 
> > > 
> > > Summing it all up, we have StreamFormats to convey back to application
> > > what a Stream can do, and StreamConfiguration to set the Stream
> > > characteristics.
> > > 
> > > Addressing the needs you have here would require:
> > > 
> > > - Pipeline handlers to populate StreamFormats not only with
> > >   <PixelFormat, vector<SizeRange>> but also with the possible sensor's
> > >   configuration that can generated that mode and the possible frame
> > >   durations
> > > 
> > > - Application to populate a StreamConfiguration with the desired
> > >   FrameDuration and sensor mode
> > > 
> > > - Pipeline handler to validate that the desired sensor mode and
> > >   durations are compatible with the stream characteristics.
> > >   (as an example, if your pipeline can't upscale, you can't have a
> > >   sensor mode with a size smaller than the desired output).
> > > 
> > > This puts more complexity on the pipeline side, in both the generation
> > > of configurations and in their validation, but would such an API
> > > address your needs in your opinion ? Have you had any thoughts on how
> > > you would like to have this handled ?
> > 
> > Currently, stepping back form the implementation details, I identify two
> > issues. 
> > 
> > 1. Is the inability for the pipeline to do that right thing.
> > 
> > As the pipeline is not aware of the desired frameDuration(s) at the time of
> > validation, it is unable to do the right thing for an application that wants
> > lets 60fps but the "best" mode does not cover it.
> > 
> > 2. Is the lack of direct sensor mode enumeration
> > 
> > This is what you describe, but my learning is that sensor mode description goes
> > beyond format/width/height/frameDuration. For an application, it is imperative
> > to know what portion will remain from the original field of view, and what is
> > the initial field of view. I'm not sure I have collected all the details if any
> > of this actually exist.
> 
> We have these information. The full pixel array size is a static
> property of the Camera, we also know the per-mode analogue crop
> rectangle (the portion of the pixel array that gets processed). So we
> can expose it alongside the sensor's output sizes, format and
> duration.
> 
> I'm a bit missing how application might be interested in the sensor's
> output format, even if I know very specialized applications (like
> some of the RPi users have) want to pick the mode with, for
> example, the highest bit resolution.
> 
> > 
> > So, if I read your proposal well, you suggest to add all supported camera modes
> > to each of the enumrated format/size. As the camera configuration becomes a
> > thing, we then have the ability to introduce more optional metadata there to
> > solve 2. I think that could work, as there is certainly a strong correlation
> > between the format/size and the available modes. Making a list of modes like
> > libcamera-apps do would be more difficult though (apps have to deal with
> > duplicates). Is that a concern ? (its not from my point of view, and we can use
> > immutable shared pointer in C++ to do this at no cost, we could even define that
> > pointer comparison is enough to match duplicates).
> > 
> 
> Duplications (when it comes to efficiency) are not my primary concern
> as generateConfiguration() is not really in any hot path.
> 
> My struggle is with dependencies between Streams.
> 
> Let's make a practical (simplified) example that only consider the
> sensor output size and the min duration a mode can achieve
> 
> Your sensor can do the following modes
> 
>         full res:       { 4144x3136, 66ms }
>         2x2 binned      { 2072x1568, 16ms }
> 
>         - generateConfiguration({ Viewfinder });
> 
>           Easy, you can get 1080p from both modes and application
>           select the one they prefer looking at durations/FOV
> 
>         - generateConfiguration({ Viewfinder, Still });
> 
>           Still wants full resolution, so you can't use the fastest
>           { 2072x1568 } mode. As a result we can only associate
>           { 4144x3136 } to both Roles. Your camera now runs at 16FPS
>           (plus Still can have additional delays, as explained above,
>           but they only apply when the Still stream is part of the Request).
> 
> My point here is that information like the duration depend on the overall
> CameraConfiguration and not on the single stream capabilities. In
> other words, it cannot be estimated in advance if not after a
> configuration has been applied (or validated) by the Camera.
> 
> As a comparison, Android lists the per-stream information (format and
> duration) as they were considered in isolation. It's up to applications
> to understand that if they configure { Viewfinder, Still } the
> resulting duration will be max(Viewfinder.duration, Still.duration).
> 
> 
> This directly connects to the below question of what StreamFormats
> should report: all modes a role can do as it would be considered in
> isolation or should they be tied to the combination of the requested
> streams ?
> 

My original idea is that for each StreamConfig, in isolation, we'd have N
SensorConfig. The combination of a StreamConfig and SensorConfig is what could
possibly offer a duration range. It make no sense to me, to give a range over
all the possible SensorConfig, since in many cases, there are not the same
stream (like different field of view notably). Or at least, these should be
setup in a way that the duration range is true for a coherent with groupe.

And then the intersection of StreamConfig+SensorConfig for one role and another,
will truncate the duration range accordingly and we should have the closes
possible approximation.


> Honestly, generateConfiguration() is handy to make application's life
> easier when they want some pre-cooked configuration. But for anything
> more than that I would be tempted to propose
> 
>         StreamFormats Camera::streamsFormats()
>         StreamFormats Camera::stallingStreamsFormats()
>         StreamFormats Camera::rawStreamsFormats()
> 
>         (Yes, that's the same model Android has)
> 
> The argument against this was that application should now know how to
> combine the Streams in order to form a valid CameraConfiguration, and
> this requires to know the platform capabilities (can the Camera do 2
> YUV streams ? Can it do 2 YUV + RAW ? etc).
> 

I think camera apps will have uses cases, and fallback use cases. They will try
and fallback, they don't really care enumerating the capabilities cause there is
too much complexity in generically adapting to various cameras capability.

Apps are specialized piece of software, and of course, the trial chain will be
short and fail if you use it with an inappropriate piece of equipement.

So in this regard, what Android expose is right, the only improvement I see is
to place the raw streams formats (I guess this is not just format but
configurations) into the list of other formats, cause we know that sensor will
need to share the same sensor/raw configuration (and this should be optional, so
we don't go populate raw formats for UVC cameras, were it does not really make
sense).

> Again for a comparison, Android has "hw levels" that describe what a
> platform can do, so if your platform is "FULL" you are guaranteed that
> you can combine up to a certain number of YUV + RAW, if it's "LIMITED"
> you can't have more than 1 YUV + RAW etc etc so that apps know what
> they can expect.
> 

So a use case flag to allow to avoid trial and error, which is fine, but of
course, the use cases are way larger in the context of libcamera, so it might be
a challenge to keep that. If we can express a little more, we can keep that list
down.

> 
> > > 
> > > That's really an open question for everyone, as I guess this part is
> > > becoming critical as this patch shows and the workaround we pushed RPi
> > > to is really not generic enough.
> > 
> > Agreed, and looking for feedback. I'm be happy to help with the subject. I know
> > this patch was only making an issue more visible, but I'm also sharing it as it
> > may temporally be useful to some users (as a downstream patch).
> > 
> > > 
> > > One more point here below
> > > 
> > > Thanks
> > >    j
> > > 
> > > -------------------------------------------------------------------------------
> > > Slightly unrelated point, but while you're here I would like to pick
> > > your brain on the issue: the way StreamFormat are currently created is
> > > slightly under-specified. The documentation goes in great length in
> > > describing how the sizes associated with PixelFormat can be inspected
> > > [1] but it only superficially describe how a pipeline handler should
> > > populate the formats associated with a stream configuration [2]. I'll
> > > make two examples:
> > > - if the StreamConfiguration is for a 1080p Viewfinder stream should
> > >   the associated StreamFormats list resolutions > 1080p ?
> > 
> > If you have scaling in your ISP, I think it should. I notice that feeding the
> > ISP with higher resolution often lead to a cleaner image. An application can
> > then decide to focus on reducing the bandwidth (and possibly the battery life as
> > a side effect) by matching the resolution.
> 
> I was not talking about the sensor's mode, but about the Stream output size.
> 
> IOW, if you ask for { Viewfinder } you (generally) get back a stream
> config with size (1920x1080). Should the StreamFormats associated with
> such config list output formats larger than 1080p ?
> 


No, but it there is multiple SensorConfig (or Raw config in our current state)
to achieve this 1920x1080. And all these sensor config will produce arguably
different streams. This is what I had in mind when special casing the sensor
configuration. It is highly relevant to the use case.

> 
> 
> > 
> > The other reason is that your viewFinder will likely match the display
> > resolution. If you do viewFinder + still pictures, and you don't want the
> > viewFinder going blank while taking a picture, you will have no choice but to
> > set the sensor at the desired still picture resolution.
> > 
> 
> Can't disagree
> 
> > Finally, if you don't expose the higher resolution, a generic caps mechanism
> > like GstCaps can't be used. When you have multiple streams, you will want to
> > intersect the sensor configuration for every streams in order to find the
> > remaining available sensor modes you can pick from. GstCaps will not allow you
> > to select higher sensor resolution if you don't include them, as there will be
> > no intersection. This is a mechanism you'll find in GStreamer and Pipewire,
> > which depends on having capability that are close to the mathematical
> > definitions of sets (with its own salt).
> > 
> 
> I guess the main issue is that gst (as many potential other
> users) want to get a general view of what the Camera can do, and
> then decide how to combine them. We have an API that tells you how the
> camera will behave by considering the overall configuration of streams.
> 
> Back to the above example, if you { Viewfinder, Still } you get 16FPS camera.
> 
> > > - if the StreamConfiguration is for a !Raw role, should RAW
> > >   pixelformats be listed in the StreamFormats ?
> > 
> > I've been trying to answer this one, but I realize I don't know if I understand
> > the full extent of the question. Is raw role going away ? If yes, then we'll
> 
> No no, it's not :)
> 
> > have to offer the raw formats into all the other roles. But we then loose a bit,
> > as the Raw role have special meaning, and can greatly help the pipeline when
> > configuring (a raw role configuration had precedence over all the other, making
> > things a little simpler when fixating the configuration). I think we have to
> > decide, but as of my today's thinking, the raw roles could remain, its just that
> > the configuration will just be 1:1 with the sensor configuration.
> > 
> 
> Sorry this wasn't clear. To me this is tightly related to how
> StreamFormats should be interpreted. Should they report what the
> camera can do in the current configuration, or expose the general
> capabilities of the Camera ? Today we do a bit of both. The example
> with RAW was that if you ask
> 
>         { Viewfinder, Still }
> 
> Currently on RkISP1 you also get back a few StreamFormat for the RAW
> configuration:
> 
>         { SBGGR10, 4144x3136 }
>         { SBGGR10, 2072x1568 }
> 
> even if you haven't asked for them.
> 
> This seems to highlight that StreamFormats should report all the
> capabilities of the Camera, including formats you can't (or shouldn't)
> use for a role, which seems just confusing and defeats the purpose of
> Roles in first place.

Yeah, I think rkisp pipeline is wrong here, as you won't have any of the
features you'd have with the other formats if you pick them. It means RKISP
propose random choices that have random capabilities, in disregard with the
roles. At the same time, can we say that ? It depends how the roles are truly
defined. But I think, I can see how the RKISP interpretation of the roles is
unusable by applications.

It basically assumes that application will know, since bayer is associate with
no processing, but what if you have a new shiny type of sensor that uses a
commonly known as "normal" format ?

> 
> > > 
> > > There are arguments for both cases, but I guess the big picture
> > > question is about how an application should expect to retrieve the
> > > full camera capabilities:
> > > - Does an application expect to receive only the available formats for
> > >   the role it has asked a configuration for ?
> > > - Does an application expect to receive all the possible formats a
> > >   camera can produce regardless of the size/pixelformat of the
> > >   StreamConfiguration that has been generated ?
> > > 
> > > As a concrete example, currently the RkISP1 pipeline handler list a
> > > RAW StreamFormat for all configurations, regardless of the role. I had
> > > a patch to "fix" this but I've been pointed to the fact the API was
> > > not very well specified.
> > > 
> > > What would your expectations be in this case ?
> > 
> > That is interesting, so today we have a bit of both. The most basic applications
> > needs to match two roles already (ViewFinder + X). So in term of sensor
> > configuration, without going into slow validation loops, have to cross over the
> > information across the roles.
> > 
> > So overall, I think my preference would be something structured like this:
> > 
> > Role1:
> >   + format/size
> > 	- Sensor Config1
> > 	- Sensor Config2
> > 	- Sensor Config3
> >   + format/size
> > 	- Sensor Config2
> > Role2:
> >   ...
> > 
> > The stream configuration as defined today seems well suited. Keeping the fixated
> > frameDuration away from it seems unavoidable. There is just too many variable
> 
> What do you mean with "fixated frameDuration" ?

A duration range is unfixed, a fixed duration is the final value you ask for. At
least in the sensors I've tested so far, we get some range, like 1/1 to 120/1
fps, and if you ask for 60fps, it will drop frames for use to match as close as
possible. At least, the RPi pipeline does this it seems, since we don't do any
of this in GStreamer as far as I can remember.

In GStreamer, configurations like this are appended into GstCaps, which is a
list of structures, where the field can be fixed values, ranges or set. This our
goto tools to deal with random complexity like this one.

> 
> > toward the fixation of the frameDuration, on top of which, applications are much
> > more flexible then they pretend. As an example, when I see:
> > 
> >    libcamerasrc ! video/x-raw,framerate=60/1 ! ...
> > 
> > I see a very naive application. I'm very doubtful that 121/2 or 119/2 would not
> > have done the equally the job. And even 50/1 would likely be fine for most use
> > cases. In GStreamer, you can (and should) use ranges when setting filters.
> > 
> >   libcamerasrc ! video/x-raw,framerate=[40/1,70/1] ! ...
> > 
> > If you have a strong preference for 60, you should hint that this way (note, as
> > I said in the review, the newly added caps negotiation is not working yet for
> > that):
> > 
> >   libcamerasrc ! video/x-raw,framerate=60/1;video/x-raw,framerate=[40/1,70/1] ! ...
> > 
> > And libcamerasrc is free to read the first structure, and same all the fixed
> > values as the "default", and later use oriented fixation function
> > (gst_structure_fixate_nearest_fraction()). This gets complex quickly, so no one
> > ever implement this ahead of time, but its all there and well defined. And quite
> > representative of how far an application can go to do the right thing.
> > 
> > How the sensor configuration would go, done by application or pipeline, would be
> > to collect all sensorConfigurations for all the streams fixated
> > streamConfiguration. Intersect, and finally score the remaining to pick the
> > best.
> 
> Do I read it right that you then expect to receive all information
> about what streams can be produced by a Camera as they were considered
> in isolation, and combine them opportunely yourself at the time of
> constructing a CameraConfiguration before starting capture ?
> 

What was described here was indeed list of streamConfiguration (as we have no in
isolution, but that does not include frame duration range, since that depends on
the selected sensor configuration). And for each of these configuration, get a
list of sensor configuration. The app can walkthrough the field of views and
stuff like this, and may have constraint regarding that. So kind of like
Android, but with the raw part put into relation with the streamConfiguration
for us. And also the raw part is now extended to include the frame duration
range (even if that could get reduced by specific process included for specific
roles like still). 


Overall, I think we at least needs a method to select some sensor specific
config, which inherently will have to be common to every streams in our setup.
An alternative is to maybe go in two steps. Do like we do now, which is to
combine in isolation constraints for each streams requested by the application.
Configure and validate the configuration. From there, enumerate the list of
other "sensor configuration" what would have worked in the supported constraints
(remember frame duration is still not part of it). So an app the just failed on
the frame duration constraints (or field of view constraints) could go through
the list and apply global constraint like the field of view, the maximum rate,
etc. What the app can detect is that the application constraints on
frameduration is not met, it could then try a sensor configuration with higher
rate (there might be multiple valid option) and try them out in order to see if
this issue can be resolved before giving up.

> 
> Thanks for sticking to this long conversation
> 
> > 
> > Hope this quick GstCaps intro can help,
> > Nicolas
> > 
> > > 
> > > [1] https://libcamera.org/api-html/classlibcamera_1_1StreamFormats.html
> > > [2] https://libcamera.org/api-html/structlibcamera_1_1StreamConfiguration.html#a37725e6b9e179ca80be0e57ed97857d3
> > > -------------------------------------------------------------------------------
> > > 
> > > 
> > > > +	if (config == nullptr) {
> > > > +		GST_DEBUG_OBJECT(self, "Sensor mode selection is not supported, skipping enumeration.");
> > > > +		return modes;
> > > > +	}
> > > > +
> > > > +	const libcamera::StreamFormats &formats = config->at(0).formats();
> > > > +
> > > > +	for (const auto &pixfmt : formats.pixelformats()) {
> > > > +		for (const auto &size : formats.sizes(pixfmt)) {
> > > > +			config->at(0).size = size;
> > > > +			config->at(0).pixelFormat = pixfmt;
> > > > +
> > > > +			if (config->validate() == CameraConfiguration::Invalid)
> > > > +				continue;
> > > > +
> > > > +			if (state->cam_->configure(config.get()))
> > > > +				continue;
> > > > +
> > > > +			auto fd_ctrl = state->cam_->controls().find(&controls::FrameDurationLimits);
> > > > +			if (fd_ctrl == state->cam_->controls().end())
> > > > +				continue;
> > > > +
> > > > +			int minrate_num, minrate_denom;
> > > > +			int maxrate_num, maxrate_denom;
> > > > +			double min_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> > > > +					       gst_util_guint64_to_gdouble(fd_ctrl->second.max().get<int64_t>());
> > > > +			double max_framerate = gst_util_guint64_to_gdouble(1.0e6) /
> > > > +					       gst_util_guint64_to_gdouble(fd_ctrl->second.min().get<int64_t>());
> > > > +			gst_util_double_to_fraction(min_framerate, &minrate_num, &minrate_denom);
> > > > +			gst_util_double_to_fraction(max_framerate, &maxrate_num, &maxrate_denom);
> > > > +
> > > > +			GstStructure *s = gst_structure_new("sensor/mode",
> > > > +							    "format", G_TYPE_STRING, pixfmt.toString().c_str(),
> > > > +							    "width", G_TYPE_INT, size.width,
> > > > +							    "height", G_TYPE_INT, size.height,
> > > > +							    "framerate", GST_TYPE_FRACTION_RANGE,
> > > > +							    minrate_num, minrate_denom,
> > > > +							    maxrate_num, maxrate_denom,
> > > > +							    nullptr);
> > > > +			gst_caps_append_structure(modes, s);
> > > > +		}
> > > > +	}
> > > > +
> > > > +	GST_DEBUG_OBJECT(self, "Camera sensor modes: %" GST_PTR_FORMAT, modes);
> > > > +
> > > > +	return modes;
> > > > +}
> > > > +
> > > >  static bool
> > > >  gst_libcamera_src_open(GstLibcameraSrc *self)
> > > >  {
> > > > @@ -375,6 +436,7 @@ gst_libcamera_src_open(GstLibcameraSrc *self)
> > > >  	/* No need to lock here, we didn't start our threads yet. */
> > > >  	self->state->cm_ = cm;
> > > >  	self->state->cam_ = cam;
> > > > +	self->sensor_modes = gst_libcamera_src_enumerate_sensor_modes(self);
> > > > 
> > > >  	return true;
> > > >  }
> > > > @@ -462,6 +524,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> > > >  	GstLibcameraSrcState *state = self->state;
> > > >  	GstFlowReturn flow_ret = GST_FLOW_OK;
> > > >  	gint ret;
> > > > +	g_autoptr(GstCaps) sensor_mode = nullptr;
> > > > 
> > > >  	g_autoptr(GstStructure) element_caps = gst_structure_new_empty("caps");
> > > > 
> > > > @@ -481,6 +544,16 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> > > >  		roles.push_back(gst_libcamera_pad_get_role(srcpad));
> > > >  	}
> > > > 
> > > > +	if (!gst_caps_is_any(self->sensor_mode)) {
> > > > +		sensor_mode = gst_caps_intersect(self->sensor_mode, self->sensor_modes);
> > > > +		if (!gst_caps_is_empty(sensor_mode)) {
> > > > +			roles.push_back(libcamera::StreamRole::Raw);
> > > > +		} else {
> > > > +			GST_WARNING_OBJECT(self, "No sensor mode matching the selection, ignoring.");
> > > > +			gst_clear_caps(&sensor_mode);
> > > > +		}
> > > > +	}
> > > > +
> > > >  	/* Generate the stream configurations, there should be one per pad. */
> > > >  	state->config_ = state->cam_->generateConfiguration(roles);
> > > >  	if (state->config_ == nullptr) {
> > > > @@ -490,7 +563,7 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> > > >  		gst_task_stop(task);
> > > >  		return;
> > > >  	}
> > > > -	g_assert(state->config_->size() == state->srcpads_.size());
> > > > +	g_assert(state->config_->size() == state->srcpads_.size() + (!!sensor_mode));
> > > > 
> > > >  	for (gsize i = 0; i < state->srcpads_.size(); i++) {
> > > >  		GstPad *srcpad = state->srcpads_[i];
> > > > @@ -510,6 +583,12 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
> > > >  		gst_libcamera_get_framerate_from_caps(caps, element_caps);
> > > >  	}
> > > > 
> > > > +	if (sensor_mode) {
> > > > +		StreamConfiguration &stream_cfg = state->config_->at(state->srcpads_.size());
> > > > +		g_assert(gst_caps_is_writable(sensor_mode));
> > > > +		gst_libcamera_configure_stream_from_caps(stream_cfg, sensor_mode);
> > > > +	}
> > > > +
> > > >  	if (flow_ret != GST_FLOW_OK)
> > > >  		goto done;
> > > > 
> > > > @@ -624,6 +703,7 @@ gst_libcamera_src_task_leave([[maybe_unused]] GstTask *task,
> > > >  	g_clear_object(&self->allocator);
> > > >  	g_clear_pointer(&self->flow_combiner,
> > > >  			(GDestroyNotify)gst_flow_combiner_free);
> > > > +	gst_clear_caps(&self->sensor_modes);
> > > >  }
> > > > 
> > > >  static void
> > > > @@ -659,6 +739,10 @@ gst_libcamera_src_set_property(GObject *object, guint prop_id,
> > > >  		g_free(self->camera_name);
> > > >  		self->camera_name = g_value_dup_string(value);
> > > >  		break;
> > > > +	case PROP_SENSOR_MODE:
> > > > +		gst_clear_caps(&self->sensor_mode);
> > > > +		self->sensor_mode = GST_CAPS(g_value_dup_boxed(value));
> > > > +		break;
> > > >  	default:
> > > >  		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
> > > >  		break;
> > > > @@ -676,6 +760,12 @@ gst_libcamera_src_get_property(GObject *object, guint prop_id, GValue *value,
> > > >  	case PROP_CAMERA_NAME:
> > > >  		g_value_set_string(value, self->camera_name);
> > > >  		break;
> > > > +	case PROP_SENSOR_MODES:
> > > > +		g_value_set_boxed(value, self->sensor_modes);
> > > > +		break;
> > > > +	case PROP_SENSOR_MODE:
> > > > +		g_value_set_boxed(value, self->sensor_mode);
> > > > +		break;
> > > >  	default:
> > > >  		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
> > > >  		break;
> > > > @@ -763,6 +853,7 @@ gst_libcamera_src_init(GstLibcameraSrc *self)
> > > >  	/* C-style friend. */
> > > >  	state->src_ = self;
> > > >  	self->state = state;
> > > > +	self->sensor_mode = gst_caps_new_any();
> > > >  }
> > > > 
> > > >  static GstPad *
> > > > @@ -844,4 +935,19 @@ gst_libcamera_src_class_init(GstLibcameraSrcClass *klass)
> > > >  							     | G_PARAM_READWRITE
> > > >  							     | G_PARAM_STATIC_STRINGS));
> > > >  	g_object_class_install_property(object_class, PROP_CAMERA_NAME, spec);
> > > > +
> > > > +	spec = g_param_spec_boxed("sensor-modes", "Sensor Modes",
> > > > +				  "GstCaps representing available sensor modes.",
> > > > +				  GST_TYPE_CAPS,
> > > > +				  (GParamFlags)(G_PARAM_READABLE
> > > > +					        | G_PARAM_STATIC_STRINGS));
> > > > +	g_object_class_install_property(object_class, PROP_SENSOR_MODES, spec);
> > > > +
> > > > +	spec = g_param_spec_boxed("sensor-mode", "Sensor Mode",
> > > > +				  "GstCaps representing selected sensor mode.",
> > > > +				  GST_TYPE_CAPS,
> > > > +				  (GParamFlags)(GST_PARAM_MUTABLE_READY
> > > > +					        | G_PARAM_READWRITE
> > > > +					        | G_PARAM_STATIC_STRINGS));
> > > > +	g_object_class_install_property(object_class, PROP_SENSOR_MODE, spec);
> > > >  }
> > > > --
> > > > 2.39.2
> > > > 
> >

Patch
diff mbox series

diff --git a/src/gstreamer/gstlibcamera-utils.cpp b/src/gstreamer/gstlibcamera-utils.cpp
index 750ec351..c8a8df09 100644
--- a/src/gstreamer/gstlibcamera-utils.cpp
+++ b/src/gstreamer/gstlibcamera-utils.cpp
@@ -416,6 +416,10 @@  gst_libcamera_configure_stream_from_caps(StreamConfiguration &stream_cfg,
 		stream_cfg.pixelFormat = gst_format_to_pixel_format(gst_format);
 	} else if (gst_structure_has_name(s, "image/jpeg")) {
 		stream_cfg.pixelFormat = formats::MJPEG;
+	} else if (gst_structure_has_name(s, "sensor/mode")) {
+		gst_structure_fixate_field(s, "format");
+		const gchar *format = gst_structure_get_string(s, "format");
+		stream_cfg.pixelFormat = PixelFormat::fromString(format);
 	} else {
 		g_critical("Unsupported media type: %s", gst_structure_get_name(s));
 	}
diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp
index a10cbd4f..2f05a03f 100644
--- a/src/gstreamer/gstlibcamerasrc.cpp
+++ b/src/gstreamer/gstlibcamerasrc.cpp
@@ -147,6 +147,9 @@  struct _GstLibcameraSrc {
 
 	gchar *camera_name;
 
+	GstCaps *sensor_modes;
+	GstCaps *sensor_mode;
+
 	GstLibcameraSrcState *state;
 	GstLibcameraAllocator *allocator;
 	GstFlowCombiner *flow_combiner;
@@ -154,7 +157,10 @@  struct _GstLibcameraSrc {
 
 enum {
 	PROP_0,
-	PROP_CAMERA_NAME
+	PROP_CAMERA_NAME,
+	PROP_SENSOR_MODES,
+	PROP_SENSOR_MODE,
+	PROP_LAST
 };
 
 G_DEFINE_TYPE_WITH_CODE(GstLibcameraSrc, gst_libcamera_src, GST_TYPE_ELEMENT,
@@ -318,6 +324,61 @@  int GstLibcameraSrcState::processRequest()
 	return err;
 }
 
+static GstCaps *
+gst_libcamera_src_enumerate_sensor_modes(GstLibcameraSrc *self)
+{
+	GstCaps *modes = gst_caps_new_empty();
+	GstLibcameraSrcState *state = self->state;
+	auto config = state->cam_->generateConfiguration({ libcamera::StreamRole::Raw,
+							   libcamera::StreamRole::VideoRecording });
+	if (config == nullptr) {
+		GST_DEBUG_OBJECT(self, "Sensor mode selection is not supported, skipping enumeration.");
+		return modes;
+	}
+
+	const libcamera::StreamFormats &formats = config->at(0).formats();
+
+	for (const auto &pixfmt : formats.pixelformats()) {
+		for (const auto &size : formats.sizes(pixfmt)) {
+			config->at(0).size = size;
+			config->at(0).pixelFormat = pixfmt;
+
+			if (config->validate() == CameraConfiguration::Invalid)
+				continue;
+
+			if (state->cam_->configure(config.get()))
+				continue;
+
+			auto fd_ctrl = state->cam_->controls().find(&controls::FrameDurationLimits);
+			if (fd_ctrl == state->cam_->controls().end())
+				continue;
+
+			int minrate_num, minrate_denom;
+			int maxrate_num, maxrate_denom;
+			double min_framerate = gst_util_guint64_to_gdouble(1.0e6) /
+					       gst_util_guint64_to_gdouble(fd_ctrl->second.max().get<int64_t>());
+			double max_framerate = gst_util_guint64_to_gdouble(1.0e6) /
+					       gst_util_guint64_to_gdouble(fd_ctrl->second.min().get<int64_t>());
+			gst_util_double_to_fraction(min_framerate, &minrate_num, &minrate_denom);
+			gst_util_double_to_fraction(max_framerate, &maxrate_num, &maxrate_denom);
+
+			GstStructure *s = gst_structure_new("sensor/mode",
+							    "format", G_TYPE_STRING, pixfmt.toString().c_str(),
+							    "width", G_TYPE_INT, size.width,
+							    "height", G_TYPE_INT, size.height,
+							    "framerate", GST_TYPE_FRACTION_RANGE,
+							    minrate_num, minrate_denom,
+							    maxrate_num, maxrate_denom,
+							    nullptr);
+			gst_caps_append_structure(modes, s);
+		}
+	}
+
+	GST_DEBUG_OBJECT(self, "Camera sensor modes: %" GST_PTR_FORMAT, modes);
+
+	return modes;
+}
+
 static bool
 gst_libcamera_src_open(GstLibcameraSrc *self)
 {
@@ -375,6 +436,7 @@  gst_libcamera_src_open(GstLibcameraSrc *self)
 	/* No need to lock here, we didn't start our threads yet. */
 	self->state->cm_ = cm;
 	self->state->cam_ = cam;
+	self->sensor_modes = gst_libcamera_src_enumerate_sensor_modes(self);
 
 	return true;
 }
@@ -462,6 +524,7 @@  gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
 	GstLibcameraSrcState *state = self->state;
 	GstFlowReturn flow_ret = GST_FLOW_OK;
 	gint ret;
+	g_autoptr(GstCaps) sensor_mode = nullptr;
 
 	g_autoptr(GstStructure) element_caps = gst_structure_new_empty("caps");
 
@@ -481,6 +544,16 @@  gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
 		roles.push_back(gst_libcamera_pad_get_role(srcpad));
 	}
 
+	if (!gst_caps_is_any(self->sensor_mode)) {
+		sensor_mode = gst_caps_intersect(self->sensor_mode, self->sensor_modes);
+		if (!gst_caps_is_empty(sensor_mode)) {
+			roles.push_back(libcamera::StreamRole::Raw);
+		} else {
+			GST_WARNING_OBJECT(self, "No sensor mode matching the selection, ignoring.");
+			gst_clear_caps(&sensor_mode);
+		}
+	}
+
 	/* Generate the stream configurations, there should be one per pad. */
 	state->config_ = state->cam_->generateConfiguration(roles);
 	if (state->config_ == nullptr) {
@@ -490,7 +563,7 @@  gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
 		gst_task_stop(task);
 		return;
 	}
-	g_assert(state->config_->size() == state->srcpads_.size());
+	g_assert(state->config_->size() == state->srcpads_.size() + (!!sensor_mode));
 
 	for (gsize i = 0; i < state->srcpads_.size(); i++) {
 		GstPad *srcpad = state->srcpads_[i];
@@ -510,6 +583,12 @@  gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,
 		gst_libcamera_get_framerate_from_caps(caps, element_caps);
 	}
 
+	if (sensor_mode) {
+		StreamConfiguration &stream_cfg = state->config_->at(state->srcpads_.size());
+		g_assert(gst_caps_is_writable(sensor_mode));
+		gst_libcamera_configure_stream_from_caps(stream_cfg, sensor_mode);
+	}
+
 	if (flow_ret != GST_FLOW_OK)
 		goto done;
 
@@ -624,6 +703,7 @@  gst_libcamera_src_task_leave([[maybe_unused]] GstTask *task,
 	g_clear_object(&self->allocator);
 	g_clear_pointer(&self->flow_combiner,
 			(GDestroyNotify)gst_flow_combiner_free);
+	gst_clear_caps(&self->sensor_modes);
 }
 
 static void
@@ -659,6 +739,10 @@  gst_libcamera_src_set_property(GObject *object, guint prop_id,
 		g_free(self->camera_name);
 		self->camera_name = g_value_dup_string(value);
 		break;
+	case PROP_SENSOR_MODE:
+		gst_clear_caps(&self->sensor_mode);
+		self->sensor_mode = GST_CAPS(g_value_dup_boxed(value));
+		break;
 	default:
 		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
 		break;
@@ -676,6 +760,12 @@  gst_libcamera_src_get_property(GObject *object, guint prop_id, GValue *value,
 	case PROP_CAMERA_NAME:
 		g_value_set_string(value, self->camera_name);
 		break;
+	case PROP_SENSOR_MODES:
+		g_value_set_boxed(value, self->sensor_modes);
+		break;
+	case PROP_SENSOR_MODE:
+		g_value_set_boxed(value, self->sensor_mode);
+		break;
 	default:
 		G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
 		break;
@@ -763,6 +853,7 @@  gst_libcamera_src_init(GstLibcameraSrc *self)
 	/* C-style friend. */
 	state->src_ = self;
 	self->state = state;
+	self->sensor_mode = gst_caps_new_any();
 }
 
 static GstPad *
@@ -844,4 +935,19 @@  gst_libcamera_src_class_init(GstLibcameraSrcClass *klass)
 							     | G_PARAM_READWRITE
 							     | G_PARAM_STATIC_STRINGS));
 	g_object_class_install_property(object_class, PROP_CAMERA_NAME, spec);
+
+	spec = g_param_spec_boxed("sensor-modes", "Sensor Modes",
+				  "GstCaps representing available sensor modes.",
+				  GST_TYPE_CAPS,
+				  (GParamFlags)(G_PARAM_READABLE
+					        | G_PARAM_STATIC_STRINGS));
+	g_object_class_install_property(object_class, PROP_SENSOR_MODES, spec);
+
+	spec = g_param_spec_boxed("sensor-mode", "Sensor Mode",
+				  "GstCaps representing selected sensor mode.",
+				  GST_TYPE_CAPS,
+				  (GParamFlags)(GST_PARAM_MUTABLE_READY
+					        | G_PARAM_READWRITE
+					        | G_PARAM_STATIC_STRINGS));
+	g_object_class_install_property(object_class, PROP_SENSOR_MODE, spec);
 }