[libcamera-devel,v3,1/4] libcamera: property: Add QueueDepth property
diff mbox series

Message ID 20210421165139.318432-2-nfraprado@collabora.com
State Superseded
Headers show
Series
  • lc-compliance: Add test to queue more requests than hardware depth
Related show

Commit Message

Nícolas F. R. A. Prado April 21, 2021, 4:51 p.m. UTC
The QueueDepth property reports the minimum amount of requests needed in
the camera pipeline.

Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
---
 src/libcamera/pipeline/ipu3/ipu3.cpp               | 4 ++++
 src/libcamera/pipeline/raspberrypi/raspberrypi.cpp | 4 ++++
 src/libcamera/pipeline/rkisp1/rkisp1.cpp           | 2 ++
 src/libcamera/pipeline/rkisp1/rkisp1_path.h        | 4 ++--
 src/libcamera/pipeline/simple/simple.cpp           | 6 ++++--
 src/libcamera/pipeline/uvcvideo/uvcvideo.cpp       | 2 ++
 src/libcamera/pipeline/vimc/vimc.cpp               | 3 +++
 src/libcamera/property_ids.yaml                    | 5 +++++
 8 files changed, 26 insertions(+), 4 deletions(-)

Comments

Naushir Patuck April 22, 2021, 8:07 a.m. UTC | #1
Hi Nícolas,

Thank you for your work.

On Wed, 21 Apr 2021 at 17:52, Nícolas F. R. A. Prado <
nfraprado@collabora.com> wrote:

> The QueueDepth property reports the minimum amount of requests needed in
> the camera pipeline.
>
> Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
> ---
>  src/libcamera/pipeline/ipu3/ipu3.cpp               | 4 ++++
>  src/libcamera/pipeline/raspberrypi/raspberrypi.cpp | 4 ++++
>  src/libcamera/pipeline/rkisp1/rkisp1.cpp           | 2 ++
>  src/libcamera/pipeline/rkisp1/rkisp1_path.h        | 4 ++--
>  src/libcamera/pipeline/simple/simple.cpp           | 6 ++++--
>  src/libcamera/pipeline/uvcvideo/uvcvideo.cpp       | 2 ++
>  src/libcamera/pipeline/vimc/vimc.cpp               | 3 +++
>  src/libcamera/property_ids.yaml                    | 5 +++++
>  8 files changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp
> b/src/libcamera/pipeline/ipu3/ipu3.cpp
> index 51446fcf5bc1..6067db2f37a3 100644
> --- a/src/libcamera/pipeline/ipu3/ipu3.cpp
> +++ b/src/libcamera/pipeline/ipu3/ipu3.cpp
> @@ -1049,6 +1049,10 @@ int PipelineHandlerIPU3::registerCameras()
>                 /* Initialize the camera properties. */
>                 data->properties_ = cio2->sensor()->properties();
>
> +               /* TODO This can be changed to CIO2 after configuration,
> but
> +                * both are 4 currently */
> +               data->properties_.set(properties::QueueDepth, 4);
> +
>                 ret = initControls(data.get());
>                 if (ret)
>                         continue;
> diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> index 2a917455500f..8d1ade3a4352 100644
> --- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> +++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> @@ -1035,6 +1035,10 @@ bool PipelineHandlerRPi::match(DeviceEnumerator
> *enumerator)
>         /* Initialize the camera properties. */
>         data->properties_ = data->sensor_->properties();
>
> +       /* TODO Can be 1, 2 or 4 depending on configuration, for now use
> the max
> +        * which is 4 */
> +       data->properties_.set(properties::QueueDepth, 4);
> +
>

Going by the definition given in the cover letter for this patch:

> So patch 1 adds the new QueueDepth property to report the minimum amount
of
> requests needed by the pipeline handler.

This value should be set to a value of 1 for the Raspberry Pi pipeline
handler.

Regards,
Naush



>         /*
>          * Set a default value for the ScalerCropMaximum property to show
>          * that we support its use, however, initialise it to zero because
> diff --git a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> index 549f4a4e61a8..7d876e9387d7 100644
> --- a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> +++ b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> @@ -21,6 +21,7 @@
>  #include <libcamera/ipa/core_ipa_interface.h>
>  #include <libcamera/ipa/rkisp1_ipa_interface.h>
>  #include <libcamera/ipa/rkisp1_ipa_proxy.h>
> +#include <libcamera/property_ids.h>
>  #include <libcamera/request.h>
>  #include <libcamera/stream.h>
>
> @@ -940,6 +941,7 @@ int PipelineHandlerRkISP1::createCamera(MediaEntity
> *sensor)
>
>         /* Initialize the camera properties. */
>         data->properties_ = data->sensor_->properties();
> +       data->properties_.set(properties::QueueDepth, RKISP1_BUFFER_COUNT);
>
>         /*
>          * \todo Read dealy values from the sensor itself or from a
> diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> index 3b3e37d258d0..7540dd41ad84 100644
> --- a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> +++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> @@ -26,6 +26,8 @@ class V4L2Subdevice;
>  struct StreamConfiguration;
>  struct V4L2SubdeviceFormat;
>
> +static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> +
>  class RkISP1Path
>  {
>  public:
> @@ -56,8 +58,6 @@ public:
>         Signal<FrameBuffer *> &bufferReady() { return video_->bufferReady;
> }
>
>  private:
> -       static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> -
>         const char *name_;
>         bool running_;
>
> diff --git a/src/libcamera/pipeline/simple/simple.cpp
> b/src/libcamera/pipeline/simple/simple.cpp
> index f6095d38e97a..6ee24f2f14e8 100644
> --- a/src/libcamera/pipeline/simple/simple.cpp
> +++ b/src/libcamera/pipeline/simple/simple.cpp
> @@ -22,6 +22,7 @@
>  #include <linux/media-bus-format.h>
>
>  #include <libcamera/camera.h>
> +#include <libcamera/property_ids.h>
>  #include <libcamera/request.h>
>  #include <libcamera/stream.h>
>
> @@ -141,6 +142,8 @@ static const SimplePipelineInfo supportedDevices[] = {
>
>  } /* namespace */
>
> +static constexpr unsigned int kNumInternalBuffers = 3;
> +
>  class SimpleCameraData : public CameraData
>  {
>  public:
> @@ -238,8 +241,6 @@ protected:
>         int queueRequestDevice(Camera *camera, Request *request) override;
>
>  private:
> -       static constexpr unsigned int kNumInternalBuffers = 3;
> -
>         SimpleCameraData *cameraData(const Camera *camera)
>         {
>                 return static_cast<SimpleCameraData *>(
> @@ -424,6 +425,7 @@ int SimpleCameraData::init()
>         }
>
>         properties_ = sensor_->properties();
> +       properties_.set(properties::QueueDepth, kNumInternalBuffers);
>
>         return 0;
>  }
> diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> index b6c6ade5ebaf..591f46b60d23 100644
> --- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> +++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> @@ -525,6 +525,8 @@ int UVCCameraData::init(MediaDevice *media)
>         properties_.set(properties::PixelArraySize, resolution);
>         properties_.set(properties::PixelArrayActiveAreas, {
> Rectangle(resolution) });
>
> +       properties_.set(properties::QueueDepth, 4);
> +
>         /* Initialise the supported controls. */
>         ControlInfoMap::Map ctrls;
>
> diff --git a/src/libcamera/pipeline/vimc/vimc.cpp
> b/src/libcamera/pipeline/vimc/vimc.cpp
> index 8f5f4ba30953..605b3fe89152 100644
> --- a/src/libcamera/pipeline/vimc/vimc.cpp
> +++ b/src/libcamera/pipeline/vimc/vimc.cpp
> @@ -20,6 +20,7 @@
>  #include <libcamera/formats.h>
>  #include <libcamera/ipa/ipa_interface.h>
>  #include <libcamera/ipa/ipa_module_info.h>
> +#include <libcamera/property_ids.h>
>  #include <libcamera/request.h>
>  #include <libcamera/stream.h>
>
> @@ -516,6 +517,8 @@ int VimcCameraData::init()
>         /* Initialize the camera properties. */
>         properties_ = sensor_->properties();
>
> +       properties_.set(properties::QueueDepth, 4);
> +
>         return 0;
>  }
>
> diff --git a/src/libcamera/property_ids.yaml
> b/src/libcamera/property_ids.yaml
> index 104e9aaf4fa3..0b7d1271a26b 100644
> --- a/src/libcamera/property_ids.yaml
> +++ b/src/libcamera/property_ids.yaml
> @@ -678,6 +678,11 @@ controls:
>          \todo Turn this property into a "maximum control value" for the
>          ScalerCrop control once "dynamic" controls have been implemented.
>
> +  - QueueDepth:
> +      type: int32_t
> +      description: |
> +        Minimum amount of requests needed in the camera pipeline.
> +
>    #
> ----------------------------------------------------------------------------
>    # Draft properties section
>
> --
> 2.31.1
>
> _______________________________________________
> libcamera-devel mailing list
> libcamera-devel@lists.libcamera.org
> https://lists.libcamera.org/listinfo/libcamera-devel
>
Laurent Pinchart April 26, 2021, 2:40 a.m. UTC | #2
Hi Nícolas,

Thank you for the patch.

On Wed, Apr 21, 2021 at 01:51:36PM -0300, Nícolas F. R. A. Prado wrote:
> The QueueDepth property reports the minimum amount of requests needed in
> the camera pipeline.
> 
> Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
> ---
>  src/libcamera/pipeline/ipu3/ipu3.cpp               | 4 ++++
>  src/libcamera/pipeline/raspberrypi/raspberrypi.cpp | 4 ++++
>  src/libcamera/pipeline/rkisp1/rkisp1.cpp           | 2 ++
>  src/libcamera/pipeline/rkisp1/rkisp1_path.h        | 4 ++--
>  src/libcamera/pipeline/simple/simple.cpp           | 6 ++++--
>  src/libcamera/pipeline/uvcvideo/uvcvideo.cpp       | 2 ++
>  src/libcamera/pipeline/vimc/vimc.cpp               | 3 +++
>  src/libcamera/property_ids.yaml                    | 5 +++++
>  8 files changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp b/src/libcamera/pipeline/ipu3/ipu3.cpp
> index 51446fcf5bc1..6067db2f37a3 100644
> --- a/src/libcamera/pipeline/ipu3/ipu3.cpp
> +++ b/src/libcamera/pipeline/ipu3/ipu3.cpp
> @@ -1049,6 +1049,10 @@ int PipelineHandlerIPU3::registerCameras()
>  		/* Initialize the camera properties. */
>  		data->properties_ = cio2->sensor()->properties();
>  
> +		/* TODO This can be changed to CIO2 after configuration, but
> +		 * both are 4 currently */

		/*
		 * \todo This can be changed to CIO2 after configuration, but
		 * both are 4 currently
		 */

The \todo is doxygen style and is used through the code base. The
placement of /* */ is our standard coding style. Same below.

> +
> +
> +		data->properties_.set(properties::QueueDepth, 4);

I think you can already lower the value, to 1 or 2 depending on the
definition of the property (see below).

> +
>  		ret = initControls(data.get());
>  		if (ret)
>  			continue;
> diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> index 2a917455500f..8d1ade3a4352 100644
> --- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> +++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> @@ -1035,6 +1035,10 @@ bool PipelineHandlerRPi::match(DeviceEnumerator *enumerator)
>  	/* Initialize the camera properties. */
>  	data->properties_ = data->sensor_->properties();
>  
> +	/* TODO Can be 1, 2 or 4 depending on configuration, for now use the max
> +	 * which is 4 */
> +	data->properties_.set(properties::QueueDepth, 4);

Same here, Naush commented that this should be 1, but depending on how
we define the control, 2 could be a better value.

> +
>  	/*
>  	 * Set a default value for the ScalerCropMaximum property to show
>  	 * that we support its use, however, initialise it to zero because
> diff --git a/src/libcamera/pipeline/rkisp1/rkisp1.cpp b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> index 549f4a4e61a8..7d876e9387d7 100644
> --- a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> +++ b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> @@ -21,6 +21,7 @@
>  #include <libcamera/ipa/core_ipa_interface.h>
>  #include <libcamera/ipa/rkisp1_ipa_interface.h>
>  #include <libcamera/ipa/rkisp1_ipa_proxy.h>
> +#include <libcamera/property_ids.h>
>  #include <libcamera/request.h>
>  #include <libcamera/stream.h>
>  
> @@ -940,6 +941,7 @@ int PipelineHandlerRkISP1::createCamera(MediaEntity *sensor)
>  
>  	/* Initialize the camera properties. */
>  	data->properties_ = data->sensor_->properties();
> +	data->properties_.set(properties::QueueDepth, RKISP1_BUFFER_COUNT);

Here 3 would be a better value, to match the driver. However, I wonder
if the hardware really needs 3 buffers. Dafna, Helen, do you have
information about this ? A quick glance at the driver makes me think we
could lower the number of buffers to 2. Lowering it to 1 may be nice,
but probably more tricky.

>  
>  	/*
>  	 * \todo Read dealy values from the sensor itself or from a
> diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.h b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> index 3b3e37d258d0..7540dd41ad84 100644
> --- a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> +++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> @@ -26,6 +26,8 @@ class V4L2Subdevice;
>  struct StreamConfiguration;
>  struct V4L2SubdeviceFormat;
>  
> +static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> +
>  class RkISP1Path
>  {
>  public:
> @@ -56,8 +58,6 @@ public:
>  	Signal<FrameBuffer *> &bufferReady() { return video_->bufferReady; }
>  
>  private:
> -	static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> -
>  	const char *name_;
>  	bool running_;
>  
> diff --git a/src/libcamera/pipeline/simple/simple.cpp b/src/libcamera/pipeline/simple/simple.cpp
> index f6095d38e97a..6ee24f2f14e8 100644
> --- a/src/libcamera/pipeline/simple/simple.cpp
> +++ b/src/libcamera/pipeline/simple/simple.cpp
> @@ -22,6 +22,7 @@
>  #include <linux/media-bus-format.h>
>  
>  #include <libcamera/camera.h>
> +#include <libcamera/property_ids.h>
>  #include <libcamera/request.h>
>  #include <libcamera/stream.h>
>  
> @@ -141,6 +142,8 @@ static const SimplePipelineInfo supportedDevices[] = {
>  
>  } /* namespace */
>  
> +static constexpr unsigned int kNumInternalBuffers = 3;
> +
>  class SimpleCameraData : public CameraData
>  {
>  public:
> @@ -238,8 +241,6 @@ protected:
>  	int queueRequestDevice(Camera *camera, Request *request) override;
>  
>  private:
> -	static constexpr unsigned int kNumInternalBuffers = 3;
> -

Is this change needed ?

>  	SimpleCameraData *cameraData(const Camera *camera)
>  	{
>  		return static_cast<SimpleCameraData *>(
> @@ -424,6 +425,7 @@ int SimpleCameraData::init()
>  	}
>  
>  	properties_ = sensor_->properties();
> +	properties_.set(properties::QueueDepth, kNumInternalBuffers);
>  
>  	return 0;
>  }
> diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> index b6c6ade5ebaf..591f46b60d23 100644
> --- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> +++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> @@ -525,6 +525,8 @@ int UVCCameraData::init(MediaDevice *media)
>  	properties_.set(properties::PixelArraySize, resolution);
>  	properties_.set(properties::PixelArrayActiveAreas, { Rectangle(resolution) });
>  
> +	properties_.set(properties::QueueDepth, 4);

You can set this to 1 or 2 (again, see below).

> +
>  	/* Initialise the supported controls. */
>  	ControlInfoMap::Map ctrls;
>  
> diff --git a/src/libcamera/pipeline/vimc/vimc.cpp b/src/libcamera/pipeline/vimc/vimc.cpp
> index 8f5f4ba30953..605b3fe89152 100644
> --- a/src/libcamera/pipeline/vimc/vimc.cpp
> +++ b/src/libcamera/pipeline/vimc/vimc.cpp
> @@ -20,6 +20,7 @@
>  #include <libcamera/formats.h>
>  #include <libcamera/ipa/ipa_interface.h>
>  #include <libcamera/ipa/ipa_module_info.h>
> +#include <libcamera/property_ids.h>
>  #include <libcamera/request.h>
>  #include <libcamera/stream.h>
>  
> @@ -516,6 +517,8 @@ int VimcCameraData::init()
>  	/* Initialize the camera properties. */
>  	properties_ = sensor_->properties();
>  
> +	properties_.set(properties::QueueDepth, 4);

And here, 2 should be enough.

> +
>  	return 0;
>  }
>  
> diff --git a/src/libcamera/property_ids.yaml b/src/libcamera/property_ids.yaml
> index 104e9aaf4fa3..0b7d1271a26b 100644
> --- a/src/libcamera/property_ids.yaml
> +++ b/src/libcamera/property_ids.yaml
> @@ -678,6 +678,11 @@ controls:
>          \todo Turn this property into a "maximum control value" for the
>          ScalerCrop control once "dynamic" controls have been implemented.
>  
> +  - QueueDepth:
> +      type: int32_t
> +      description: |
> +        Minimum amount of requests needed in the camera pipeline.

I think this needs to be better defined.

First of all, the name QueueDepth doesn't really relate to concepts that
are explicitly defined in libcamera. We should thus either define those
concepts, or, if we want to define the property in a self-contained way,
change the name to match the description of the property. This could for
instance become MinNumRequests.

Then, we should also expand the definition of the control. This requires
a bit of design work. What do we want to convey here ? Let's start the
brainstorming session, which means you can challenge anything I'll
propose here, and present any new idea.

There are multiple reasons why we need a minimum number of requests
queued:

- Fulfilling driver requirements, which are related to hardware
  requirements.

  It is common for video capture hardware to require multiple buffers to
  be queued to the hardware. A very common situation (mostly seen in
  low-end hardware) is to have two buffer address registers for the
  capture DMA engine, and ping-pong between the two of them. The
  hardware in that case can't deal with buffer underruns, so the driver
  needs to supply two buffers before capture can be started. The driver
  is also limited by the hardware when it comes to buffer completion.
  When a frame capture complete, a new buffer is needed to replace the
  completed one. Drivers usually hold on the completed buffer if no new
  buffer is available, so that the DMA engine will always have two
  buffers available. This means that queueing two buffers would start
  the capture, and a third buffer is needed for the driver to send
  completed buffers back to userspace. A similar situation occurs when
  the hardware uses shadow registers instead of an explicit ping-pong
  mechanism.

  There are several things that drivers can (and should) do to minimize
  the impact on userspace. One of them would be to allocate a scratch
  buffer to be used when userspace doesn't provide buffers fast enough.
  This is costly in terms of memory. Userspace is in a better position
  to handle scratch buffers, and skip them if it can guarantee that
  there will be no buffer queue underrun. However, when the device is
  behind an IOMMU, a single physical page can be allocated and mapped to
  the device as a large DMA area that spans a full frame, which makes
  scratch buffers cheap if handled by the kernel.

  Another contingency measure is possible in the ping-pong case. If the
  hardware requires buffers A and B to be available, capture can start
  immediately when two buffers (let's call them 1 and 2) are queued.
  When capture to A completes, the driver needs to reprogram the address
  of buffer A. If a third buffer is available in the queue at that time,
  it will use it.  Otherwise, it could reuse buffer 2 to program address
  A. This means that, when capture to B completes, that frame will be
  dropped, as the buffer will be reused immediately for A. This makes it
  possible to dequeue buffers with only two buffers queued instead of
  three. Using less than three buffers would however still result in
  frames being dropped.

  Note that these constraints should be applicable to inline hardware
  only. For offline ISPs (memory-to-memory), the hardware shouldn't have
  such constraints, and should be fine with starting processing as soon
  as one buffer is available, and to stop processing until a new buffer
  is available in case of a buffer queue underrun. That's at least the
  case for sane hardware, and we may not be able to completely exclude
  insanity on the hardware side.

- Peeking ahead in the request parameters.

  It can take time for some parameters to be applied. For instance, it's
  quite typical for the camera sensor's exposure time and gain to
  require 2 and 1 frames to be applied. This means that, to guarantee
  proper function of per-frame control, we need to have parameters
  available in advance. When running with auto-exposure enabled this may
  be less of a problem for the exposure time and gain, but if the
  application wants to control those parameters manually, we need enough
  requests in the queue.

  Generally speaking, a camera pipeline can be composed of multiple
  processing stages. A typical case has a camera sensor connected to an
  inline ISP (or just a CSI-2 receiver) to capture raw images, followed
  by an offline (memory-to-memory) ISP to process raw frames and output
  YUV. The result can then be encoded to JPEG by a memory-to-memory
  encoder (this is only relevant for the Android camera HAL
  implementation, the libcamera core doesn't deal with JPEG encoding).
  While frame N is being encoded to JPEG, frame N+1 is being processed
  by the offline ISP, frame N+2 is being read out from the sensor (and
  possibly processed by the inline ISP), and frame N+3 may be exposed.
  Even if the hardware doesn't require more than one buffer to be queued
  to function, if request N contains parameters for the camera sensor,
  for the offline ISP, and for the JPEG encoder, those parameters need
  to be available to libcamera three frames ealier to be applied at the
  right time.

  Note that some processing steps could in theory take more than the
  time of one frame to complete, if they're pipelined at the hardware
  level, or if they can be run in parallel for multiple frames (for
  instance we could have two JPEG encoders that take the time of one
  frame and a half to encode a frame). I'm not sure if this would impact
  what we need to expose to applications.

The question is how to translate this to requirements exposed to
applications. Going back to the IPU3 and RPi pipeline handlers, both
have the same hardware architecture, with a CSI-2 receiver capturing to
memory, and an offline ISP. The hardware and drivers will operate with a
single buffer, but the inline part will drop frames if we don't have at
least two buffers for raw frames (which may be internal buffers if the
application only captures YUV streams, or application-provided buffers
if the application also captures a RAW stream). A minimum of 1 request
will be able to capture frames in suboptimal conditions. 2 requests
would be needed to avoid dropping frames, and 3 requests to ensure
manual exposure time can be applied early enough (a 4th request would
then be needed to handle the JPEG encoding, but as that's for the
Android camera HAL implementation only, the libcamera core doesn't need
to consider it). Note that, if IPA algorithms are available, we could
possibly get away with less requests as the IPA could compute parameters
earlier, but we'd be screwed if a request disables algorithms, which we
can't really predict.

Let's also note that this doesn't mean that we need to have 3 or 4
buffers available, the buffers are only needed on the output side of the
pipeline, later than the parameters, but we currently have no way to
decouple buffers and controls.

What should we report in such a case ? I think we need to provide enough
information for applications to implement the optimal case (no frame
dropped, controls applied on the right frame, ...). We probably also
want to let applications know what the bare minimum is in order to
capture frames in suboptimal conditions. For instance, an application
may want to capture frames with a very low frame rate, or even capture a
single snapshot, and it would probably be overkill to require queuing
more than one request in that case. That leaves open the question of how
to deal with drivers that need more than one buffer to operate (the
non-IOMMU case above). Do we want to support them or can we draw the
line there (I think we should support them, unfortunately) ? Do we want
to allocate scratch buffers in the pipeline handler to make this
completely transparent to the application, which would result in a waste
of memory if the application provides us with enough buffers at all
times (and would also give us "interesting" problems to solve, such as
deciding when to queue a scratch buffer at start time) ? In many cases
the IPA needs to run for a few frames before the algorithms stabilize,
which would require multiple requests to be queued anyway (a single
snapshot use case could be implemented with a helper on top of
libcamera), but if an application disables all algorithms and provides
manual values for all controls, it could be feasible to capture a proper
snapshot with a single buffer and a single request.

Naush, David, your input would be welcome here, both on the explanation
of the issue (are my understanding and explanation correct ?), and on
the questions that follow.

Nícolas, don't let this scare you, you don't have to solve this issue as
part of this patch series :-) We can merge it as a first step, and
continue working on top. I would however like this patch to define the
QueueDepth property a bit better. Based on the above, you can decide
what you want to represent with QueueDepth, and document and name it
accordingly. If you're not sure, we can of course discuss it.

> +
>    # ----------------------------------------------------------------------------
>    # Draft properties section
>
Naushir Patuck April 26, 2021, 10:38 a.m. UTC | #3
Hi everyone,

On Mon, 26 Apr 2021 at 03:40, Laurent Pinchart <
laurent.pinchart@ideasonboard.com> wrote:

> Hi Nícolas,
>
> Thank you for the patch.
>
> On Wed, Apr 21, 2021 at 01:51:36PM -0300, Nícolas F. R. A. Prado wrote:
> > The QueueDepth property reports the minimum amount of requests needed in
> > the camera pipeline.
> >
> > Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
> > ---
> >  src/libcamera/pipeline/ipu3/ipu3.cpp               | 4 ++++
> >  src/libcamera/pipeline/raspberrypi/raspberrypi.cpp | 4 ++++
> >  src/libcamera/pipeline/rkisp1/rkisp1.cpp           | 2 ++
> >  src/libcamera/pipeline/rkisp1/rkisp1_path.h        | 4 ++--
> >  src/libcamera/pipeline/simple/simple.cpp           | 6 ++++--
> >  src/libcamera/pipeline/uvcvideo/uvcvideo.cpp       | 2 ++
> >  src/libcamera/pipeline/vimc/vimc.cpp               | 3 +++
> >  src/libcamera/property_ids.yaml                    | 5 +++++
> >  8 files changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp
> b/src/libcamera/pipeline/ipu3/ipu3.cpp
> > index 51446fcf5bc1..6067db2f37a3 100644
> > --- a/src/libcamera/pipeline/ipu3/ipu3.cpp
> > +++ b/src/libcamera/pipeline/ipu3/ipu3.cpp
> > @@ -1049,6 +1049,10 @@ int PipelineHandlerIPU3::registerCameras()
> >               /* Initialize the camera properties. */
> >               data->properties_ = cio2->sensor()->properties();
> >
> > +             /* TODO This can be changed to CIO2 after configuration,
> but
> > +              * both are 4 currently */
>
>                 /*
>                  * \todo This can be changed to CIO2 after configuration,
> but
>                  * both are 4 currently
>                  */
>
> The \todo is doxygen style and is used through the code base. The
> placement of /* */ is our standard coding style. Same below.
>
> > +
> > +
> > +             data->properties_.set(properties::QueueDepth, 4);
>
> I think you can already lower the value, to 1 or 2 depending on the
> definition of the property (see below).
>
> > +
> >               ret = initControls(data.get());
> >               if (ret)
> >                       continue;
> > diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > index 2a917455500f..8d1ade3a4352 100644
> > --- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > +++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > @@ -1035,6 +1035,10 @@ bool PipelineHandlerRPi::match(DeviceEnumerator
> *enumerator)
> >       /* Initialize the camera properties. */
> >       data->properties_ = data->sensor_->properties();
> >
> > +     /* TODO Can be 1, 2 or 4 depending on configuration, for now use
> the max
> > +      * which is 4 */
> > +     data->properties_.set(properties::QueueDepth, 4);
>
> Same here, Naush commented that this should be 1, but depending on how
> we define the control, 2 could be a better value.
>
> > +
> >       /*
> >        * Set a default value for the ScalerCropMaximum property to show
> >        * that we support its use, however, initialise it to zero because
> > diff --git a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > index 549f4a4e61a8..7d876e9387d7 100644
> > --- a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > +++ b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > @@ -21,6 +21,7 @@
> >  #include <libcamera/ipa/core_ipa_interface.h>
> >  #include <libcamera/ipa/rkisp1_ipa_interface.h>
> >  #include <libcamera/ipa/rkisp1_ipa_proxy.h>
> > +#include <libcamera/property_ids.h>
> >  #include <libcamera/request.h>
> >  #include <libcamera/stream.h>
> >
> > @@ -940,6 +941,7 @@ int PipelineHandlerRkISP1::createCamera(MediaEntity
> *sensor)
> >
> >       /* Initialize the camera properties. */
> >       data->properties_ = data->sensor_->properties();
> > +     data->properties_.set(properties::QueueDepth, RKISP1_BUFFER_COUNT);
>
> Here 3 would be a better value, to match the driver. However, I wonder
> if the hardware really needs 3 buffers. Dafna, Helen, do you have
> information about this ? A quick glance at the driver makes me think we
> could lower the number of buffers to 2. Lowering it to 1 may be nice,
> but probably more tricky.
>
> >
> >       /*
> >        * \todo Read dealy values from the sensor itself or from a
> > diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > index 3b3e37d258d0..7540dd41ad84 100644
> > --- a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > +++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > @@ -26,6 +26,8 @@ class V4L2Subdevice;
> >  struct StreamConfiguration;
> >  struct V4L2SubdeviceFormat;
> >
> > +static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> > +
> >  class RkISP1Path
> >  {
> >  public:
> > @@ -56,8 +58,6 @@ public:
> >       Signal<FrameBuffer *> &bufferReady() { return video_->bufferReady;
> }
> >
> >  private:
> > -     static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> > -
> >       const char *name_;
> >       bool running_;
> >
> > diff --git a/src/libcamera/pipeline/simple/simple.cpp
> b/src/libcamera/pipeline/simple/simple.cpp
> > index f6095d38e97a..6ee24f2f14e8 100644
> > --- a/src/libcamera/pipeline/simple/simple.cpp
> > +++ b/src/libcamera/pipeline/simple/simple.cpp
> > @@ -22,6 +22,7 @@
> >  #include <linux/media-bus-format.h>
> >
> >  #include <libcamera/camera.h>
> > +#include <libcamera/property_ids.h>
> >  #include <libcamera/request.h>
> >  #include <libcamera/stream.h>
> >
> > @@ -141,6 +142,8 @@ static const SimplePipelineInfo supportedDevices[] =
> {
> >
> >  } /* namespace */
> >
> > +static constexpr unsigned int kNumInternalBuffers = 3;
> > +
> >  class SimpleCameraData : public CameraData
> >  {
> >  public:
> > @@ -238,8 +241,6 @@ protected:
> >       int queueRequestDevice(Camera *camera, Request *request) override;
> >
> >  private:
> > -     static constexpr unsigned int kNumInternalBuffers = 3;
> > -
>
> Is this change needed ?
>
> >       SimpleCameraData *cameraData(const Camera *camera)
> >       {
> >               return static_cast<SimpleCameraData *>(
> > @@ -424,6 +425,7 @@ int SimpleCameraData::init()
> >       }
> >
> >       properties_ = sensor_->properties();
> > +     properties_.set(properties::QueueDepth, kNumInternalBuffers);
> >
> >       return 0;
> >  }
> > diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > index b6c6ade5ebaf..591f46b60d23 100644
> > --- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > +++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > @@ -525,6 +525,8 @@ int UVCCameraData::init(MediaDevice *media)
> >       properties_.set(properties::PixelArraySize, resolution);
> >       properties_.set(properties::PixelArrayActiveAreas, {
> Rectangle(resolution) });
> >
> > +     properties_.set(properties::QueueDepth, 4);
>
> You can set this to 1 or 2 (again, see below).
>
> > +
> >       /* Initialise the supported controls. */
> >       ControlInfoMap::Map ctrls;
> >
> > diff --git a/src/libcamera/pipeline/vimc/vimc.cpp
> b/src/libcamera/pipeline/vimc/vimc.cpp
> > index 8f5f4ba30953..605b3fe89152 100644
> > --- a/src/libcamera/pipeline/vimc/vimc.cpp
> > +++ b/src/libcamera/pipeline/vimc/vimc.cpp
> > @@ -20,6 +20,7 @@
> >  #include <libcamera/formats.h>
> >  #include <libcamera/ipa/ipa_interface.h>
> >  #include <libcamera/ipa/ipa_module_info.h>
> > +#include <libcamera/property_ids.h>
> >  #include <libcamera/request.h>
> >  #include <libcamera/stream.h>
> >
> > @@ -516,6 +517,8 @@ int VimcCameraData::init()
> >       /* Initialize the camera properties. */
> >       properties_ = sensor_->properties();
> >
> > +     properties_.set(properties::QueueDepth, 4);
>
> And here, 2 should be enough.
>
> > +
> >       return 0;
> >  }
> >
> > diff --git a/src/libcamera/property_ids.yaml
> b/src/libcamera/property_ids.yaml
> > index 104e9aaf4fa3..0b7d1271a26b 100644
> > --- a/src/libcamera/property_ids.yaml
> > +++ b/src/libcamera/property_ids.yaml
> > @@ -678,6 +678,11 @@ controls:
> >          \todo Turn this property into a "maximum control value" for the
> >          ScalerCrop control once "dynamic" controls have been
> implemented.
> >
> > +  - QueueDepth:
> > +      type: int32_t
> > +      description: |
> > +        Minimum amount of requests needed in the camera pipeline.
>
> I think this needs to be better defined.
>
> First of all, the name QueueDepth doesn't really relate to concepts that
> are explicitly defined in libcamera. We should thus either define those
> concepts, or, if we want to define the property in a self-contained way,
> change the name to match the description of the property. This could for
> instance become MinNumRequests.
>
> Then, we should also expand the definition of the control. This requires
> a bit of design work. What do we want to convey here ? Let's start the
> brainstorming session, which means you can challenge anything I'll
> propose here, and present any new idea.
>
> There are multiple reasons why we need a minimum number of requests
> queued:
>
> - Fulfilling driver requirements, which are related to hardware
>   requirements.
>
>   It is common for video capture hardware to require multiple buffers to
>   be queued to the hardware. A very common situation (mostly seen in
>   low-end hardware) is to have two buffer address registers for the
>   capture DMA engine, and ping-pong between the two of them. The
>   hardware in that case can't deal with buffer underruns, so the driver
>   needs to supply two buffers before capture can be started. The driver
>   is also limited by the hardware when it comes to buffer completion.
>   When a frame capture complete, a new buffer is needed to replace the
>   completed one. Drivers usually hold on the completed buffer if no new
>   buffer is available, so that the DMA engine will always have two
>   buffers available. This means that queueing two buffers would start
>   the capture, and a third buffer is needed for the driver to send
>   completed buffers back to userspace. A similar situation occurs when
>   the hardware uses shadow registers instead of an explicit ping-pong
>   mechanism.
>
>   There are several things that drivers can (and should) do to minimize
>   the impact on userspace. One of them would be to allocate a scratch
>   buffer to be used when userspace doesn't provide buffers fast enough.
>   This is costly in terms of memory. Userspace is in a better position
>   to handle scratch buffers, and skip them if it can guarantee that
>   there will be no buffer queue underrun. However, when the device is
>   behind an IOMMU, a single physical page can be allocated and mapped to
>   the device as a large DMA area that spans a full frame, which makes
>   scratch buffers cheap if handled by the kernel.
>
>   Another contingency measure is possible in the ping-pong case. If the
>   hardware requires buffers A and B to be available, capture can start
>   immediately when two buffers (let's call them 1 and 2) are queued.
>   When capture to A completes, the driver needs to reprogram the address
>   of buffer A. If a third buffer is available in the queue at that time,
>   it will use it.  Otherwise, it could reuse buffer 2 to program address
>   A. This means that, when capture to B completes, that frame will be
>   dropped, as the buffer will be reused immediately for A. This makes it
>   possible to dequeue buffers with only two buffers queued instead of
>   three. Using less than three buffers would however still result in
>   frames being dropped.
>
>   Note that these constraints should be applicable to inline hardware
>   only. For offline ISPs (memory-to-memory), the hardware shouldn't have
>   such constraints, and should be fine with starting processing as soon
>   as one buffer is available, and to stop processing until a new buffer
>   is available in case of a buffer queue underrun. That's at least the
>   case for sane hardware, and we may not be able to completely exclude
>   insanity on the hardware side.
>
> - Peeking ahead in the request parameters.
>
>   It can take time for some parameters to be applied. For instance, it's
>   quite typical for the camera sensor's exposure time and gain to
>   require 2 and 1 frames to be applied. This means that, to guarantee
>   proper function of per-frame control, we need to have parameters
>   available in advance. When running with auto-exposure enabled this may
>   be less of a problem for the exposure time and gain, but if the
>   application wants to control those parameters manually, we need enough
>   requests in the queue.
>
>   Generally speaking, a camera pipeline can be composed of multiple
>   processing stages. A typical case has a camera sensor connected to an
>   inline ISP (or just a CSI-2 receiver) to capture raw images, followed
>   by an offline (memory-to-memory) ISP to process raw frames and output
>   YUV. The result can then be encoded to JPEG by a memory-to-memory
>   encoder (this is only relevant for the Android camera HAL
>   implementation, the libcamera core doesn't deal with JPEG encoding).
>   While frame N is being encoded to JPEG, frame N+1 is being processed
>   by the offline ISP, frame N+2 is being read out from the sensor (and
>   possibly processed by the inline ISP), and frame N+3 may be exposed.
>   Even if the hardware doesn't require more than one buffer to be queued
>   to function, if request N contains parameters for the camera sensor,
>   for the offline ISP, and for the JPEG encoder, those parameters need
>   to be available to libcamera three frames ealier to be applied at the
>   right time.
>
>   Note that some processing steps could in theory take more than the
>   time of one frame to complete, if they're pipelined at the hardware
>   level, or if they can be run in parallel for multiple frames (for
>   instance we could have two JPEG encoders that take the time of one
>   frame and a half to encode a frame). I'm not sure if this would impact
>   what we need to expose to applications.
>
> The question is how to translate this to requirements exposed to
> applications. Going back to the IPU3 and RPi pipeline handlers, both
> have the same hardware architecture, with a CSI-2 receiver capturing to
> memory, and an offline ISP. The hardware and drivers will operate with a
> single buffer, but the inline part will drop frames if we don't have at
> least two buffers for raw frames (which may be internal buffers if the
> application only captures YUV streams, or application-provided buffers
> if the application also captures a RAW stream). A minimum of 1 request
> will be able to capture frames in suboptimal conditions. 2 requests
> would be needed to avoid dropping frames, and 3 requests to ensure
> manual exposure time can be applied early enough (a 4th request would
> then be needed to handle the JPEG encoding, but as that's for the
> Android camera HAL implementation only, the libcamera core doesn't need
> to consider it). Note that, if IPA algorithms are available, we could
> possibly get away with less requests as the IPA could compute parameters
> earlier, but we'd be screwed if a request disables algorithms, which we
> can't really predict.
>
> Let's also note that this doesn't mean that we need to have 3 or 4
> buffers available, the buffers are only needed on the output side of the
> pipeline, later than the parameters, but we currently have no way to
> decouple buffers and controls.
>
> What should we report in such a case ? I think we need to provide enough
> information for applications to implement the optimal case (no frame
> dropped, controls applied on the right frame, ...). We probably also
> want to let applications know what the bare minimum is in order to
> capture frames in suboptimal conditions. For instance, an application
> may want to capture frames with a very low frame rate, or even capture a
> single snapshot, and it would probably be overkill to require queuing
> more than one request in that case. That leaves open the question of how
> to deal with drivers that need more than one buffer to operate (the
> non-IOMMU case above). Do we want to support them or can we draw the
> line there (I think we should support them, unfortunately) ? Do we want
> to allocate scratch buffers in the pipeline handler to make this
> completely transparent to the application, which would result in a waste
> of memory if the application provides us with enough buffers at all
> times (and would also give us "interesting" problems to solve, such as
> deciding when to queue a scratch buffer at start time) ? In many cases
> the IPA needs to run for a few frames before the algorithms stabilize,
> which would require multiple requests to be queued anyway (a single
> snapshot use case could be implemented with a helper on top of
> libcamera), but if an application disables all algorithms and provides
> manual values for all controls, it could be feasible to capture a proper
> snapshot with a single buffer and a single request.
>
> Naush, David, your input would be welcome here, both on the explanation
> of the issue (are my understanding and explanation correct ?), and on
> the questions that follow.
>

I think Laurent's explanation provides a comprehensive summary of what
this control might need to encompass.  To provide a bit more context, for
Raspberry Pi, our CSI2 device driver (Unicam) does use a scratch buffer
if a framebuffer is not provided by the application.  This scratch buffer is
16k (single page) in size, as our hardware allows us to truncate or run in
circular buffer mode.  I intentionally left allocation and control of the
scratch
buffer within the kernel driver as buffer handling is driven by the device
interrupt handler for timing guarantees.  Given that Unicam writes out to
memory, and our ISP is purely a memory to memory device, I have suggested
our minimum queue depth be set to 1 for guaranteed operation.

Having said that, there is an interesting point raised by Laurent about
manual
exposure and gain controls.  I believe we have always said that these
controls don't need to be consumed by the buffer returned in the Request
that set them (i.e. the gain/exposure updates can return N frames later),
is that still correct?  Of course, setting manual exposure/gain before
starting
streaming would (should!) guarantee the buffer returned in the Request
will have the correct values set.

In my opinion, I don't see much value in tying this QueueDepth property with
sensor delays - which can and will be different per sensor.  If an
application
has requested manual exposure/gain values *while in a streaming state*
it might just have to scan the metadata on each returned framebuffer until
it has the frame with the values it requested.  This can be easily done in
it's request/return loop without needing to know the frame delays, so no
need to adjust QueueDepth for this.

Regards,
Naush



>
> Nícolas, don't let this scare you, you don't have to solve this issue as
> part of this patch series :-) We can merge it as a first step, and
> continue working on top. I would however like this patch to define the
> QueueDepth property a bit better. Based on the above, you can decide
> what you want to represent with QueueDepth, and document and name it
> accordingly. If you're not sure, we can of course discuss it.
>
> > +
> >    #
> ----------------------------------------------------------------------------
> >    # Draft properties section
> >
>
> --
> Regards,
>
> Laurent Pinchart
>
Nícolas F. R. A. Prado April 26, 2021, 4:36 p.m. UTC | #4
Hi,

thank you both for the feedback.

Em 2021-04-26 07:38, Naushir Patuck escreveu:

> Hi everyone,
>
> On Mon, 26 Apr 2021 at 03:40, Laurent Pinchart <
> laurent.pinchart@ideasonboard.com> wrote:
>
> > Hi Nícolas,
> >
> > Thank you for the patch.
> >
> > On Wed, Apr 21, 2021 at 01:51:36PM -0300, Nícolas F. R. A. Prado wrote:
> > > The QueueDepth property reports the minimum amount of requests needed in
> > > the camera pipeline.
> > >
> > > Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
> > > ---
> > >  src/libcamera/pipeline/ipu3/ipu3.cpp               | 4 ++++
> > >  src/libcamera/pipeline/raspberrypi/raspberrypi.cpp | 4 ++++
> > >  src/libcamera/pipeline/rkisp1/rkisp1.cpp           | 2 ++
> > >  src/libcamera/pipeline/rkisp1/rkisp1_path.h        | 4 ++--
> > >  src/libcamera/pipeline/simple/simple.cpp           | 6 ++++--
> > >  src/libcamera/pipeline/uvcvideo/uvcvideo.cpp       | 2 ++
> > >  src/libcamera/pipeline/vimc/vimc.cpp               | 3 +++
> > >  src/libcamera/property_ids.yaml                    | 5 +++++
> > >  8 files changed, 26 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp
> > b/src/libcamera/pipeline/ipu3/ipu3.cpp
> > > index 51446fcf5bc1..6067db2f37a3 100644
> > > --- a/src/libcamera/pipeline/ipu3/ipu3.cpp
> > > +++ b/src/libcamera/pipeline/ipu3/ipu3.cpp
> > > @@ -1049,6 +1049,10 @@ int PipelineHandlerIPU3::registerCameras()
> > >               /* Initialize the camera properties. */
> > >               data->properties_ = cio2->sensor()->properties();
> > >
> > > +             /* TODO This can be changed to CIO2 after configuration,
> > but
> > > +              * both are 4 currently */
> >
> >                 /*
> >                  * \todo This can be changed to CIO2 after configuration,
> > but
> >                  * both are 4 currently
> >                  */
> >
> > The \todo is doxygen style and is used through the code base. The
> > placement of /* */ is our standard coding style. Same below.
> >
> > > +
> > > +
> > > +             data->properties_.set(properties::QueueDepth, 4);
> >
> > I think you can already lower the value, to 1 or 2 depending on the
> > definition of the property (see below).
> >
> > > +
> > >               ret = initControls(data.get());
> > >               if (ret)
> > >                       continue;
> > > diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > > index 2a917455500f..8d1ade3a4352 100644
> > > --- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > > +++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > > @@ -1035,6 +1035,10 @@ bool PipelineHandlerRPi::match(DeviceEnumerator
> > *enumerator)
> > >       /* Initialize the camera properties. */
> > >       data->properties_ = data->sensor_->properties();
> > >
> > > +     /* TODO Can be 1, 2 or 4 depending on configuration, for now use
> > the max
> > > +      * which is 4 */
> > > +     data->properties_.set(properties::QueueDepth, 4);
> >
> > Same here, Naush commented that this should be 1, but depending on how
> > we define the control, 2 could be a better value.
> >
> > > +
> > >       /*
> > >        * Set a default value for the ScalerCropMaximum property to show
> > >        * that we support its use, however, initialise it to zero because
> > > diff --git a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > > index 549f4a4e61a8..7d876e9387d7 100644
> > > --- a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > > +++ b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > > @@ -21,6 +21,7 @@
> > >  #include <libcamera/ipa/core_ipa_interface.h>
> > >  #include <libcamera/ipa/rkisp1_ipa_interface.h>
> > >  #include <libcamera/ipa/rkisp1_ipa_proxy.h>
> > > +#include <libcamera/property_ids.h>
> > >  #include <libcamera/request.h>
> > >  #include <libcamera/stream.h>
> > >
> > > @@ -940,6 +941,7 @@ int PipelineHandlerRkISP1::createCamera(MediaEntity
> > *sensor)
> > >
> > >       /* Initialize the camera properties. */
> > >       data->properties_ = data->sensor_->properties();
> > > +     data->properties_.set(properties::QueueDepth, RKISP1_BUFFER_COUNT);
> >
> > Here 3 would be a better value, to match the driver. However, I wonder
> > if the hardware really needs 3 buffers. Dafna, Helen, do you have
> > information about this ? A quick glance at the driver makes me think we
> > could lower the number of buffers to 2. Lowering it to 1 may be nice,
> > but probably more tricky.
> >
> > >
> > >       /*
> > >        * \todo Read dealy values from the sensor itself or from a
> > > diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > > index 3b3e37d258d0..7540dd41ad84 100644
> > > --- a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > > +++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > > @@ -26,6 +26,8 @@ class V4L2Subdevice;
> > >  struct StreamConfiguration;
> > >  struct V4L2SubdeviceFormat;
> > >
> > > +static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> > > +
> > >  class RkISP1Path
> > >  {
> > >  public:
> > > @@ -56,8 +58,6 @@ public:
> > >       Signal<FrameBuffer *> &bufferReady() { return video_->bufferReady;
> > }
> > >
> > >  private:
> > > -     static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> > > -
> > >       const char *name_;
> > >       bool running_;
> > >
> > > diff --git a/src/libcamera/pipeline/simple/simple.cpp
> > b/src/libcamera/pipeline/simple/simple.cpp
> > > index f6095d38e97a..6ee24f2f14e8 100644
> > > --- a/src/libcamera/pipeline/simple/simple.cpp
> > > +++ b/src/libcamera/pipeline/simple/simple.cpp
> > > @@ -22,6 +22,7 @@
> > >  #include <linux/media-bus-format.h>
> > >
> > >  #include <libcamera/camera.h>
> > > +#include <libcamera/property_ids.h>
> > >  #include <libcamera/request.h>
> > >  #include <libcamera/stream.h>
> > >
> > > @@ -141,6 +142,8 @@ static const SimplePipelineInfo supportedDevices[] =
> > {
> > >
> > >  } /* namespace */
> > >
> > > +static constexpr unsigned int kNumInternalBuffers = 3;
> > > +
> > >  class SimpleCameraData : public CameraData
> > >  {
> > >  public:
> > > @@ -238,8 +241,6 @@ protected:
> > >       int queueRequestDevice(Camera *camera, Request *request) override;
> > >
> > >  private:
> > > -     static constexpr unsigned int kNumInternalBuffers = 3;
> > > -
> >
> > Is this change needed ?
> >
> > >       SimpleCameraData *cameraData(const Camera *camera)
> > >       {
> > >               return static_cast<SimpleCameraData *>(
> > > @@ -424,6 +425,7 @@ int SimpleCameraData::init()
> > >       }
> > >
> > >       properties_ = sensor_->properties();
> > > +     properties_.set(properties::QueueDepth, kNumInternalBuffers);
> > >
> > >       return 0;
> > >  }
> > > diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > > index b6c6ade5ebaf..591f46b60d23 100644
> > > --- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > > +++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > > @@ -525,6 +525,8 @@ int UVCCameraData::init(MediaDevice *media)
> > >       properties_.set(properties::PixelArraySize, resolution);
> > >       properties_.set(properties::PixelArrayActiveAreas, {
> > Rectangle(resolution) });
> > >
> > > +     properties_.set(properties::QueueDepth, 4);
> >
> > You can set this to 1 or 2 (again, see below).
> >
> > > +
> > >       /* Initialise the supported controls. */
> > >       ControlInfoMap::Map ctrls;
> > >
> > > diff --git a/src/libcamera/pipeline/vimc/vimc.cpp
> > b/src/libcamera/pipeline/vimc/vimc.cpp
> > > index 8f5f4ba30953..605b3fe89152 100644
> > > --- a/src/libcamera/pipeline/vimc/vimc.cpp
> > > +++ b/src/libcamera/pipeline/vimc/vimc.cpp
> > > @@ -20,6 +20,7 @@
> > >  #include <libcamera/formats.h>
> > >  #include <libcamera/ipa/ipa_interface.h>
> > >  #include <libcamera/ipa/ipa_module_info.h>
> > > +#include <libcamera/property_ids.h>
> > >  #include <libcamera/request.h>
> > >  #include <libcamera/stream.h>
> > >
> > > @@ -516,6 +517,8 @@ int VimcCameraData::init()
> > >       /* Initialize the camera properties. */
> > >       properties_ = sensor_->properties();
> > >
> > > +     properties_.set(properties::QueueDepth, 4);
> >
> > And here, 2 should be enough.
> >
> > > +
> > >       return 0;
> > >  }
> > >
> > > diff --git a/src/libcamera/property_ids.yaml
> > b/src/libcamera/property_ids.yaml
> > > index 104e9aaf4fa3..0b7d1271a26b 100644
> > > --- a/src/libcamera/property_ids.yaml
> > > +++ b/src/libcamera/property_ids.yaml
> > > @@ -678,6 +678,11 @@ controls:
> > >          \todo Turn this property into a "maximum control value" for the
> > >          ScalerCrop control once "dynamic" controls have been
> > implemented.
> > >
> > > +  - QueueDepth:
> > > +      type: int32_t
> > > +      description: |
> > > +        Minimum amount of requests needed in the camera pipeline.
> >
> > I think this needs to be better defined.
> >
> > First of all, the name QueueDepth doesn't really relate to concepts that
> > are explicitly defined in libcamera. We should thus either define those
> > concepts, or, if we want to define the property in a self-contained way,
> > change the name to match the description of the property. This could for
> > instance become MinNumRequests.
> >
> > Then, we should also expand the definition of the control. This requires
> > a bit of design work. What do we want to convey here ? Let's start the
> > brainstorming session, which means you can challenge anything I'll
> > propose here, and present any new idea.
> >
> > There are multiple reasons why we need a minimum number of requests
> > queued:
> >
> > - Fulfilling driver requirements, which are related to hardware
> >   requirements.
> >
> >   It is common for video capture hardware to require multiple buffers to
> >   be queued to the hardware. A very common situation (mostly seen in
> >   low-end hardware) is to have two buffer address registers for the
> >   capture DMA engine, and ping-pong between the two of them. The
> >   hardware in that case can't deal with buffer underruns, so the driver
> >   needs to supply two buffers before capture can be started. The driver
> >   is also limited by the hardware when it comes to buffer completion.
> >   When a frame capture complete, a new buffer is needed to replace the
> >   completed one. Drivers usually hold on the completed buffer if no new
> >   buffer is available, so that the DMA engine will always have two
> >   buffers available. This means that queueing two buffers would start
> >   the capture, and a third buffer is needed for the driver to send
> >   completed buffers back to userspace. A similar situation occurs when
> >   the hardware uses shadow registers instead of an explicit ping-pong
> >   mechanism.
> >
> >   There are several things that drivers can (and should) do to minimize
> >   the impact on userspace. One of them would be to allocate a scratch
> >   buffer to be used when userspace doesn't provide buffers fast enough.
> >   This is costly in terms of memory. Userspace is in a better position
> >   to handle scratch buffers, and skip them if it can guarantee that
> >   there will be no buffer queue underrun. However, when the device is
> >   behind an IOMMU, a single physical page can be allocated and mapped to
> >   the device as a large DMA area that spans a full frame, which makes
> >   scratch buffers cheap if handled by the kernel.
> >
> >   Another contingency measure is possible in the ping-pong case. If the
> >   hardware requires buffers A and B to be available, capture can start
> >   immediately when two buffers (let's call them 1 and 2) are queued.
> >   When capture to A completes, the driver needs to reprogram the address
> >   of buffer A. If a third buffer is available in the queue at that time,
> >   it will use it.  Otherwise, it could reuse buffer 2 to program address
> >   A. This means that, when capture to B completes, that frame will be
> >   dropped, as the buffer will be reused immediately for A. This makes it
> >   possible to dequeue buffers with only two buffers queued instead of
> >   three. Using less than three buffers would however still result in
> >   frames being dropped.
> >
> >   Note that these constraints should be applicable to inline hardware
> >   only. For offline ISPs (memory-to-memory), the hardware shouldn't have
> >   such constraints, and should be fine with starting processing as soon
> >   as one buffer is available, and to stop processing until a new buffer
> >   is available in case of a buffer queue underrun. That's at least the
> >   case for sane hardware, and we may not be able to completely exclude
> >   insanity on the hardware side.
> >
> > - Peeking ahead in the request parameters.
> >
> >   It can take time for some parameters to be applied. For instance, it's
> >   quite typical for the camera sensor's exposure time and gain to
> >   require 2 and 1 frames to be applied. This means that, to guarantee
> >   proper function of per-frame control, we need to have parameters
> >   available in advance. When running with auto-exposure enabled this may
> >   be less of a problem for the exposure time and gain, but if the
> >   application wants to control those parameters manually, we need enough
> >   requests in the queue.
> >
> >   Generally speaking, a camera pipeline can be composed of multiple
> >   processing stages. A typical case has a camera sensor connected to an
> >   inline ISP (or just a CSI-2 receiver) to capture raw images, followed
> >   by an offline (memory-to-memory) ISP to process raw frames and output
> >   YUV. The result can then be encoded to JPEG by a memory-to-memory
> >   encoder (this is only relevant for the Android camera HAL
> >   implementation, the libcamera core doesn't deal with JPEG encoding).
> >   While frame N is being encoded to JPEG, frame N+1 is being processed
> >   by the offline ISP, frame N+2 is being read out from the sensor (and
> >   possibly processed by the inline ISP), and frame N+3 may be exposed.
> >   Even if the hardware doesn't require more than one buffer to be queued
> >   to function, if request N contains parameters for the camera sensor,
> >   for the offline ISP, and for the JPEG encoder, those parameters need
> >   to be available to libcamera three frames ealier to be applied at the
> >   right time.
> >
> >   Note that some processing steps could in theory take more than the
> >   time of one frame to complete, if they're pipelined at the hardware
> >   level, or if they can be run in parallel for multiple frames (for
> >   instance we could have two JPEG encoders that take the time of one
> >   frame and a half to encode a frame). I'm not sure if this would impact
> >   what we need to expose to applications.
> >
> > The question is how to translate this to requirements exposed to
> > applications. Going back to the IPU3 and RPi pipeline handlers, both
> > have the same hardware architecture, with a CSI-2 receiver capturing to
> > memory, and an offline ISP. The hardware and drivers will operate with a
> > single buffer, but the inline part will drop frames if we don't have at
> > least two buffers for raw frames (which may be internal buffers if the
> > application only captures YUV streams, or application-provided buffers
> > if the application also captures a RAW stream). A minimum of 1 request
> > will be able to capture frames in suboptimal conditions. 2 requests
> > would be needed to avoid dropping frames, and 3 requests to ensure
> > manual exposure time can be applied early enough (a 4th request would
> > then be needed to handle the JPEG encoding, but as that's for the
> > Android camera HAL implementation only, the libcamera core doesn't need
> > to consider it). Note that, if IPA algorithms are available, we could
> > possibly get away with less requests as the IPA could compute parameters
> > earlier, but we'd be screwed if a request disables algorithms, which we
> > can't really predict.
> >
> > Let's also note that this doesn't mean that we need to have 3 or 4
> > buffers available, the buffers are only needed on the output side of the
> > pipeline, later than the parameters, but we currently have no way to
> > decouple buffers and controls.
> >
> > What should we report in such a case ? I think we need to provide enough
> > information for applications to implement the optimal case (no frame
> > dropped, controls applied on the right frame, ...). We probably also
> > want to let applications know what the bare minimum is in order to
> > capture frames in suboptimal conditions. For instance, an application
> > may want to capture frames with a very low frame rate, or even capture a
> > single snapshot, and it would probably be overkill to require queuing
> > more than one request in that case. That leaves open the question of how
> > to deal with drivers that need more than one buffer to operate (the
> > non-IOMMU case above). Do we want to support them or can we draw the
> > line there (I think we should support them, unfortunately) ? Do we want
> > to allocate scratch buffers in the pipeline handler to make this
> > completely transparent to the application, which would result in a waste
> > of memory if the application provides us with enough buffers at all
> > times (and would also give us "interesting" problems to solve, such as
> > deciding when to queue a scratch buffer at start time) ? In many cases
> > the IPA needs to run for a few frames before the algorithms stabilize,
> > which would require multiple requests to be queued anyway (a single
> > snapshot use case could be implemented with a helper on top of
> > libcamera), but if an application disables all algorithms and provides
> > manual values for all controls, it could be feasible to capture a proper
> > snapshot with a single buffer and a single request.
> >
> > Naush, David, your input would be welcome here, both on the explanation
> > of the issue (are my understanding and explanation correct ?), and on
> > the questions that follow.
> >
>
> I think Laurent's explanation provides a comprehensive summary of what
> this control might need to encompass. To provide a bit more context, for
> Raspberry Pi, our CSI2 device driver (Unicam) does use a scratch buffer
> if a framebuffer is not provided by the application. This scratch buffer is
> 16k (single page) in size, as our hardware allows us to truncate or run in
> circular buffer mode. I intentionally left allocation and control of the
> scratch
> buffer within the kernel driver as buffer handling is driven by the device
> interrupt handler for timing guarantees. Given that Unicam writes out to
> memory, and our ISP is purely a memory to memory device, I have suggested
> our minimum queue depth be set to 1 for guaranteed operation.
>
> Having said that, there is an interesting point raised by Laurent about
> manual
> exposure and gain controls. I believe we have always said that these
> controls don't need to be consumed by the buffer returned in the Request
> that set them (i.e. the gain/exposure updates can return N frames later),
> is that still correct? Of course, setting manual exposure/gain before
> starting
> streaming would (should!) guarantee the buffer returned in the Request
> will have the correct values set.
>
> In my opinion, I don't see much value in tying this QueueDepth property with
> sensor delays - which can and will be different per sensor. If an
> application
> has requested manual exposure/gain values *while in a streaming state*
> it might just have to scan the metadata on each returned framebuffer until
> it has the frame with the values it requested. This can be easily done in
> it's request/return loop without needing to know the frame delays, so no
> need to adjust QueueDepth for this.

This seems like it needs more discussion and those of you already involved in
libcamera understand the implications better than me :).

My rationale is: With this series I added the flexibility to choose the number
of buffers to be allocated, but I wanted the code that doesn't care about that
to keep working, by allocating an amount of buffers that just works (like the
current lc-compliance tests, the internal tests, cam and qcam).

So, based on your comments, how about I rename the property to
RecommendedNumRequests ? This would be the number of requests recommended in
order for everything to work well (both for the hardware pipeline and manually
setting controls for frames). It seems more discussion needs to happen in order
to determine if sensor delays should be accounted for or not, but in case they
are, then another property could be created as MinNumRequests, for the real
minimum number needed, although without guarantee that controls are set in time
and no frames are dropped.

Regarding the sensor delay, I wonder about the possibility of the pipeline
handlers finding out the sensor delay based on the Model property (possibly in a
"database" that can be reused by all pipeline handlers). That way, during setup
the pipeline handler would export its MinNumRequest property based on a
hardcoded value since it knows the hardware requirements, and then
MinNumRequests + SensorDelay (or max() them? Not sure) to get
RecommendedNumRequests and export it.

Thanks,
Nícolas

>
> Regards,
> Naush
>
>
>
> >
> > Nícolas, don't let this scare you, you don't have to solve this issue as
> > part of this patch series :-) We can merge it as a first step, and
> > continue working on top. I would however like this patch to define the
> > QueueDepth property a bit better. Based on the above, you can decide
> > what you want to represent with QueueDepth, and document and name it
> > accordingly. If you're not sure, we can of course discuss it.
> >
> > > +
> > >    #
> > ----------------------------------------------------------------------------
> > >    # Draft properties section
> > >
> >
> > --
> > Regards,
> >
> > Laurent Pinchart
> >
Naushir Patuck April 26, 2021, 6:35 p.m. UTC | #5
Hi Nicolas,

On Mon, 26 Apr 2021 at 18:13, Nícolas F. R. A. Prado <
nfraprado@collabora.com> wrote:

> Hi,
>
> thank you both for the feedback.
>
> Em 2021-04-26 07:38, Naushir Patuck escreveu:
>
> > Hi everyone,
> >
> > On Mon, 26 Apr 2021 at 03:40, Laurent Pinchart <
> > laurent.pinchart@ideasonboard.com> wrote:
> >
> > > Hi Nícolas,
> > >
> > > Thank you for the patch.
> > >
> > > On Wed, Apr 21, 2021 at 01:51:36PM -0300, Nícolas F. R. A. Prado wrote:
> > > > The QueueDepth property reports the minimum amount of requests
> needed in
> > > > the camera pipeline.
> > > >
> > > > Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
> > > > ---
> > > >  src/libcamera/pipeline/ipu3/ipu3.cpp               | 4 ++++
> > > >  src/libcamera/pipeline/raspberrypi/raspberrypi.cpp | 4 ++++
> > > >  src/libcamera/pipeline/rkisp1/rkisp1.cpp           | 2 ++
> > > >  src/libcamera/pipeline/rkisp1/rkisp1_path.h        | 4 ++--
> > > >  src/libcamera/pipeline/simple/simple.cpp           | 6 ++++--
> > > >  src/libcamera/pipeline/uvcvideo/uvcvideo.cpp       | 2 ++
> > > >  src/libcamera/pipeline/vimc/vimc.cpp               | 3 +++
> > > >  src/libcamera/property_ids.yaml                    | 5 +++++
> > > >  8 files changed, 26 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp
> > > b/src/libcamera/pipeline/ipu3/ipu3.cpp
> > > > index 51446fcf5bc1..6067db2f37a3 100644
> > > > --- a/src/libcamera/pipeline/ipu3/ipu3.cpp
> > > > +++ b/src/libcamera/pipeline/ipu3/ipu3.cpp
> > > > @@ -1049,6 +1049,10 @@ int PipelineHandlerIPU3::registerCameras()
> > > >               /* Initialize the camera properties. */
> > > >               data->properties_ = cio2->sensor()->properties();
> > > >
> > > > +             /* TODO This can be changed to CIO2 after
> configuration,
> > > but
> > > > +              * both are 4 currently */
> > >
> > >                 /*
> > >                  * \todo This can be changed to CIO2 after
> configuration,
> > > but
> > >                  * both are 4 currently
> > >                  */
> > >
> > > The \todo is doxygen style and is used through the code base. The
> > > placement of /* */ is our standard coding style. Same below.
> > >
> > > > +
> > > > +
> > > > +             data->properties_.set(properties::QueueDepth, 4);
> > >
> > > I think you can already lower the value, to 1 or 2 depending on the
> > > definition of the property (see below).
> > >
> > > > +
> > > >               ret = initControls(data.get());
> > > >               if (ret)
> > > >                       continue;
> > > > diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > > b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > > > index 2a917455500f..8d1ade3a4352 100644
> > > > --- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > > > +++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
> > > > @@ -1035,6 +1035,10 @@ bool
> PipelineHandlerRPi::match(DeviceEnumerator
> > > *enumerator)
> > > >       /* Initialize the camera properties. */
> > > >       data->properties_ = data->sensor_->properties();
> > > >
> > > > +     /* TODO Can be 1, 2 or 4 depending on configuration, for now
> use
> > > the max
> > > > +      * which is 4 */
> > > > +     data->properties_.set(properties::QueueDepth, 4);
> > >
> > > Same here, Naush commented that this should be 1, but depending on how
> > > we define the control, 2 could be a better value.
> > >
> > > > +
> > > >       /*
> > > >        * Set a default value for the ScalerCropMaximum property to
> show
> > > >        * that we support its use, however, initialise it to zero
> because
> > > > diff --git a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > > b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > > > index 549f4a4e61a8..7d876e9387d7 100644
> > > > --- a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > > > +++ b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
> > > > @@ -21,6 +21,7 @@
> > > >  #include <libcamera/ipa/core_ipa_interface.h>
> > > >  #include <libcamera/ipa/rkisp1_ipa_interface.h>
> > > >  #include <libcamera/ipa/rkisp1_ipa_proxy.h>
> > > > +#include <libcamera/property_ids.h>
> > > >  #include <libcamera/request.h>
> > > >  #include <libcamera/stream.h>
> > > >
> > > > @@ -940,6 +941,7 @@ int
> PipelineHandlerRkISP1::createCamera(MediaEntity
> > > *sensor)
> > > >
> > > >       /* Initialize the camera properties. */
> > > >       data->properties_ = data->sensor_->properties();
> > > > +     data->properties_.set(properties::QueueDepth,
> RKISP1_BUFFER_COUNT);
> > >
> > > Here 3 would be a better value, to match the driver. However, I wonder
> > > if the hardware really needs 3 buffers. Dafna, Helen, do you have
> > > information about this ? A quick glance at the driver makes me think we
> > > could lower the number of buffers to 2. Lowering it to 1 may be nice,
> > > but probably more tricky.
> > >
> > > >
> > > >       /*
> > > >        * \todo Read dealy values from the sensor itself or from a
> > > > diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > > b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > > > index 3b3e37d258d0..7540dd41ad84 100644
> > > > --- a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > > > +++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
> > > > @@ -26,6 +26,8 @@ class V4L2Subdevice;
> > > >  struct StreamConfiguration;
> > > >  struct V4L2SubdeviceFormat;
> > > >
> > > > +static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> > > > +
> > > >  class RkISP1Path
> > > >  {
> > > >  public:
> > > > @@ -56,8 +58,6 @@ public:
> > > >       Signal<FrameBuffer *> &bufferReady() { return
> video_->bufferReady;
> > > }
> > > >
> > > >  private:
> > > > -     static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
> > > > -
> > > >       const char *name_;
> > > >       bool running_;
> > > >
> > > > diff --git a/src/libcamera/pipeline/simple/simple.cpp
> > > b/src/libcamera/pipeline/simple/simple.cpp
> > > > index f6095d38e97a..6ee24f2f14e8 100644
> > > > --- a/src/libcamera/pipeline/simple/simple.cpp
> > > > +++ b/src/libcamera/pipeline/simple/simple.cpp
> > > > @@ -22,6 +22,7 @@
> > > >  #include <linux/media-bus-format.h>
> > > >
> > > >  #include <libcamera/camera.h>
> > > > +#include <libcamera/property_ids.h>
> > > >  #include <libcamera/request.h>
> > > >  #include <libcamera/stream.h>
> > > >
> > > > @@ -141,6 +142,8 @@ static const SimplePipelineInfo
> supportedDevices[] =
> > > {
> > > >
> > > >  } /* namespace */
> > > >
> > > > +static constexpr unsigned int kNumInternalBuffers = 3;
> > > > +
> > > >  class SimpleCameraData : public CameraData
> > > >  {
> > > >  public:
> > > > @@ -238,8 +241,6 @@ protected:
> > > >       int queueRequestDevice(Camera *camera, Request *request)
> override;
> > > >
> > > >  private:
> > > > -     static constexpr unsigned int kNumInternalBuffers = 3;
> > > > -
> > >
> > > Is this change needed ?
> > >
> > > >       SimpleCameraData *cameraData(const Camera *camera)
> > > >       {
> > > >               return static_cast<SimpleCameraData *>(
> > > > @@ -424,6 +425,7 @@ int SimpleCameraData::init()
> > > >       }
> > > >
> > > >       properties_ = sensor_->properties();
> > > > +     properties_.set(properties::QueueDepth, kNumInternalBuffers);
> > > >
> > > >       return 0;
> > > >  }
> > > > diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > > b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > > > index b6c6ade5ebaf..591f46b60d23 100644
> > > > --- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > > > +++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
> > > > @@ -525,6 +525,8 @@ int UVCCameraData::init(MediaDevice *media)
> > > >       properties_.set(properties::PixelArraySize, resolution);
> > > >       properties_.set(properties::PixelArrayActiveAreas, {
> > > Rectangle(resolution) });
> > > >
> > > > +     properties_.set(properties::QueueDepth, 4);
> > >
> > > You can set this to 1 or 2 (again, see below).
> > >
> > > > +
> > > >       /* Initialise the supported controls. */
> > > >       ControlInfoMap::Map ctrls;
> > > >
> > > > diff --git a/src/libcamera/pipeline/vimc/vimc.cpp
> > > b/src/libcamera/pipeline/vimc/vimc.cpp
> > > > index 8f5f4ba30953..605b3fe89152 100644
> > > > --- a/src/libcamera/pipeline/vimc/vimc.cpp
> > > > +++ b/src/libcamera/pipeline/vimc/vimc.cpp
> > > > @@ -20,6 +20,7 @@
> > > >  #include <libcamera/formats.h>
> > > >  #include <libcamera/ipa/ipa_interface.h>
> > > >  #include <libcamera/ipa/ipa_module_info.h>
> > > > +#include <libcamera/property_ids.h>
> > > >  #include <libcamera/request.h>
> > > >  #include <libcamera/stream.h>
> > > >
> > > > @@ -516,6 +517,8 @@ int VimcCameraData::init()
> > > >       /* Initialize the camera properties. */
> > > >       properties_ = sensor_->properties();
> > > >
> > > > +     properties_.set(properties::QueueDepth, 4);
> > >
> > > And here, 2 should be enough.
> > >
> > > > +
> > > >       return 0;
> > > >  }
> > > >
> > > > diff --git a/src/libcamera/property_ids.yaml
> > > b/src/libcamera/property_ids.yaml
> > > > index 104e9aaf4fa3..0b7d1271a26b 100644
> > > > --- a/src/libcamera/property_ids.yaml
> > > > +++ b/src/libcamera/property_ids.yaml
> > > > @@ -678,6 +678,11 @@ controls:
> > > >          \todo Turn this property into a "maximum control value" for
> the
> > > >          ScalerCrop control once "dynamic" controls have been
> > > implemented.
> > > >
> > > > +  - QueueDepth:
> > > > +      type: int32_t
> > > > +      description: |
> > > > +        Minimum amount of requests needed in the camera pipeline.
> > >
> > > I think this needs to be better defined.
> > >
> > > First of all, the name QueueDepth doesn't really relate to concepts
> that
> > > are explicitly defined in libcamera. We should thus either define those
> > > concepts, or, if we want to define the property in a self-contained
> way,
> > > change the name to match the description of the property. This could
> for
> > > instance become MinNumRequests.
> > >
> > > Then, we should also expand the definition of the control. This
> requires
> > > a bit of design work. What do we want to convey here ? Let's start the
> > > brainstorming session, which means you can challenge anything I'll
> > > propose here, and present any new idea.
> > >
> > > There are multiple reasons why we need a minimum number of requests
> > > queued:
> > >
> > > - Fulfilling driver requirements, which are related to hardware
> > >   requirements.
> > >
> > >   It is common for video capture hardware to require multiple buffers
> to
> > >   be queued to the hardware. A very common situation (mostly seen in
> > >   low-end hardware) is to have two buffer address registers for the
> > >   capture DMA engine, and ping-pong between the two of them. The
> > >   hardware in that case can't deal with buffer underruns, so the driver
> > >   needs to supply two buffers before capture can be started. The driver
> > >   is also limited by the hardware when it comes to buffer completion.
> > >   When a frame capture complete, a new buffer is needed to replace the
> > >   completed one. Drivers usually hold on the completed buffer if no new
> > >   buffer is available, so that the DMA engine will always have two
> > >   buffers available. This means that queueing two buffers would start
> > >   the capture, and a third buffer is needed for the driver to send
> > >   completed buffers back to userspace. A similar situation occurs when
> > >   the hardware uses shadow registers instead of an explicit ping-pong
> > >   mechanism.
> > >
> > >   There are several things that drivers can (and should) do to minimize
> > >   the impact on userspace. One of them would be to allocate a scratch
> > >   buffer to be used when userspace doesn't provide buffers fast enough.
> > >   This is costly in terms of memory. Userspace is in a better position
> > >   to handle scratch buffers, and skip them if it can guarantee that
> > >   there will be no buffer queue underrun. However, when the device is
> > >   behind an IOMMU, a single physical page can be allocated and mapped
> to
> > >   the device as a large DMA area that spans a full frame, which makes
> > >   scratch buffers cheap if handled by the kernel.
> > >
> > >   Another contingency measure is possible in the ping-pong case. If the
> > >   hardware requires buffers A and B to be available, capture can start
> > >   immediately when two buffers (let's call them 1 and 2) are queued.
> > >   When capture to A completes, the driver needs to reprogram the
> address
> > >   of buffer A. If a third buffer is available in the queue at that
> time,
> > >   it will use it.  Otherwise, it could reuse buffer 2 to program
> address
> > >   A. This means that, when capture to B completes, that frame will be
> > >   dropped, as the buffer will be reused immediately for A. This makes
> it
> > >   possible to dequeue buffers with only two buffers queued instead of
> > >   three. Using less than three buffers would however still result in
> > >   frames being dropped.
> > >
> > >   Note that these constraints should be applicable to inline hardware
> > >   only. For offline ISPs (memory-to-memory), the hardware shouldn't
> have
> > >   such constraints, and should be fine with starting processing as soon
> > >   as one buffer is available, and to stop processing until a new buffer
> > >   is available in case of a buffer queue underrun. That's at least the
> > >   case for sane hardware, and we may not be able to completely exclude
> > >   insanity on the hardware side.
> > >
> > > - Peeking ahead in the request parameters.
> > >
> > >   It can take time for some parameters to be applied. For instance,
> it's
> > >   quite typical for the camera sensor's exposure time and gain to
> > >   require 2 and 1 frames to be applied. This means that, to guarantee
> > >   proper function of per-frame control, we need to have parameters
> > >   available in advance. When running with auto-exposure enabled this
> may
> > >   be less of a problem for the exposure time and gain, but if the
> > >   application wants to control those parameters manually, we need
> enough
> > >   requests in the queue.
> > >
> > >   Generally speaking, a camera pipeline can be composed of multiple
> > >   processing stages. A typical case has a camera sensor connected to an
> > >   inline ISP (or just a CSI-2 receiver) to capture raw images, followed
> > >   by an offline (memory-to-memory) ISP to process raw frames and output
> > >   YUV. The result can then be encoded to JPEG by a memory-to-memory
> > >   encoder (this is only relevant for the Android camera HAL
> > >   implementation, the libcamera core doesn't deal with JPEG encoding).
> > >   While frame N is being encoded to JPEG, frame N+1 is being processed
> > >   by the offline ISP, frame N+2 is being read out from the sensor (and
> > >   possibly processed by the inline ISP), and frame N+3 may be exposed.
> > >   Even if the hardware doesn't require more than one buffer to be
> queued
> > >   to function, if request N contains parameters for the camera sensor,
> > >   for the offline ISP, and for the JPEG encoder, those parameters need
> > >   to be available to libcamera three frames ealier to be applied at the
> > >   right time.
> > >
> > >   Note that some processing steps could in theory take more than the
> > >   time of one frame to complete, if they're pipelined at the hardware
> > >   level, or if they can be run in parallel for multiple frames (for
> > >   instance we could have two JPEG encoders that take the time of one
> > >   frame and a half to encode a frame). I'm not sure if this would
> impact
> > >   what we need to expose to applications.
> > >
> > > The question is how to translate this to requirements exposed to
> > > applications. Going back to the IPU3 and RPi pipeline handlers, both
> > > have the same hardware architecture, with a CSI-2 receiver capturing to
> > > memory, and an offline ISP. The hardware and drivers will operate with
> a
> > > single buffer, but the inline part will drop frames if we don't have at
> > > least two buffers for raw frames (which may be internal buffers if the
> > > application only captures YUV streams, or application-provided buffers
> > > if the application also captures a RAW stream). A minimum of 1 request
> > > will be able to capture frames in suboptimal conditions. 2 requests
> > > would be needed to avoid dropping frames, and 3 requests to ensure
> > > manual exposure time can be applied early enough (a 4th request would
> > > then be needed to handle the JPEG encoding, but as that's for the
> > > Android camera HAL implementation only, the libcamera core doesn't need
> > > to consider it). Note that, if IPA algorithms are available, we could
> > > possibly get away with less requests as the IPA could compute
> parameters
> > > earlier, but we'd be screwed if a request disables algorithms, which we
> > > can't really predict.
> > >
> > > Let's also note that this doesn't mean that we need to have 3 or 4
> > > buffers available, the buffers are only needed on the output side of
> the
> > > pipeline, later than the parameters, but we currently have no way to
> > > decouple buffers and controls.
> > >
> > > What should we report in such a case ? I think we need to provide
> enough
> > > information for applications to implement the optimal case (no frame
> > > dropped, controls applied on the right frame, ...). We probably also
> > > want to let applications know what the bare minimum is in order to
> > > capture frames in suboptimal conditions. For instance, an application
> > > may want to capture frames with a very low frame rate, or even capture
> a
> > > single snapshot, and it would probably be overkill to require queuing
> > > more than one request in that case. That leaves open the question of
> how
> > > to deal with drivers that need more than one buffer to operate (the
> > > non-IOMMU case above). Do we want to support them or can we draw the
> > > line there (I think we should support them, unfortunately) ? Do we want
> > > to allocate scratch buffers in the pipeline handler to make this
> > > completely transparent to the application, which would result in a
> waste
> > > of memory if the application provides us with enough buffers at all
> > > times (and would also give us "interesting" problems to solve, such as
> > > deciding when to queue a scratch buffer at start time) ? In many cases
> > > the IPA needs to run for a few frames before the algorithms stabilize,
> > > which would require multiple requests to be queued anyway (a single
> > > snapshot use case could be implemented with a helper on top of
> > > libcamera), but if an application disables all algorithms and provides
> > > manual values for all controls, it could be feasible to capture a
> proper
> > > snapshot with a single buffer and a single request.
> > >
> > > Naush, David, your input would be welcome here, both on the explanation
> > > of the issue (are my understanding and explanation correct ?), and on
> > > the questions that follow.
> > >
> >
> > I think Laurent's explanation provides a comprehensive summary of what
> > this control might need to encompass. To provide a bit more context, for
> > Raspberry Pi, our CSI2 device driver (Unicam) does use a scratch buffer
> > if a framebuffer is not provided by the application. This scratch buffer
> is
> > 16k (single page) in size, as our hardware allows us to truncate or run
> in
> > circular buffer mode. I intentionally left allocation and control of the
> > scratch
> > buffer within the kernel driver as buffer handling is driven by the
> device
> > interrupt handler for timing guarantees. Given that Unicam writes out to
> > memory, and our ISP is purely a memory to memory device, I have suggested
> > our minimum queue depth be set to 1 for guaranteed operation.
> >
> > Having said that, there is an interesting point raised by Laurent about
> > manual
> > exposure and gain controls. I believe we have always said that these
> > controls don't need to be consumed by the buffer returned in the Request
> > that set them (i.e. the gain/exposure updates can return N frames later),
> > is that still correct? Of course, setting manual exposure/gain before
> > starting
> > streaming would (should!) guarantee the buffer returned in the Request
> > will have the correct values set.
> >
> > In my opinion, I don't see much value in tying this QueueDepth property
> with
> > sensor delays - which can and will be different per sensor. If an
> > application
> > has requested manual exposure/gain values *while in a streaming state*
> > it might just have to scan the metadata on each returned framebuffer
> until
> > it has the frame with the values it requested. This can be easily done in
> > it's request/return loop without needing to know the frame delays, so no
> > need to adjust QueueDepth for this.
>
> This seems like it needs more discussion and those of you already involved
> in
> libcamera understand the implications better than me :).
>
> My rationale is: With this series I added the flexibility to choose the
> number
> of buffers to be allocated, but I wanted the code that doesn't care about
> that
> to keep working, by allocating an amount of buffers that just works (like
> the
> current lc-compliance tests, the internal tests, cam and qcam).
>
> So, based on your comments, how about I rename the property to
> RecommendedNumRequests ? This would be the number of requests recommended
> in
> order for everything to work well (both for the hardware pipeline and
> manually
> setting controls for frames). It seems more discussion needs to happen in
> order
> to determine if sensor delays should be accounted for or not, but in case
> they
> are, then another property could be created as MinNumRequests, for the real
> minimum number needed, although without guarantee that controls are set in
> time
> and no frames are dropped.
>

This may be possible, but probably not labelled as RecommendedNumRequests
and MinNumRequest.  If  RecommendedNumRequests were to account for sensor
delays, the property value would only really apply if the application were
to set a
manual exposure or gain (or any other delayed control).  So "Recommended"
here
would mean recommended only if/when you provide some manual controls  :-)


>
> Regarding the sensor delay, I wonder about the possibility of the pipeline
> handlers finding out the sensor delay based on the Model property
> (possibly in a
> "database" that can be reused by all pipeline handlers). That way, during
> setup
> the pipeline handler would export its MinNumRequest property based on a
> hardcoded value since it knows the hardware requirements, and then
> MinNumRequests + SensorDelay (or max() them? Not sure) to get
> RecommendedNumRequests and export it.
>

Raspberry Pi uses our CamHelper database for exactly this.  There is also
the
libcamera SensorDatabase that is building up functionality and would
eventually
replace CamHelper, where we would include the appropriate delay values.

Regards,
Naush


> Thanks,
> Nícolas
>
> >
> > Regards,
> > Naush
> >
> >
> >
> > >
> > > Nícolas, don't let this scare you, you don't have to solve this issue
> as
> > > part of this patch series :-) We can merge it as a first step, and
> > > continue working on top. I would however like this patch to define the
> > > QueueDepth property a bit better. Based on the above, you can decide
> > > what you want to represent with QueueDepth, and document and name it
> > > accordingly. If you're not sure, we can of course discuss it.
> > >
> > > > +
> > > >    #
> > >
> ----------------------------------------------------------------------------
> > > >    # Draft properties section
> > > >
> > >
> > > --
> > > Regards,
> > >
> > > Laurent Pinchart
> > >
>
>
>
David Plowman April 27, 2021, 7:27 a.m. UTC | #6
Hi everyone

Just a few more random thoughts...

On Mon, 26 Apr 2021 at 19:35, Naushir Patuck <naush@raspberrypi.com> wrote:
>
> Hi Nicolas,
>
> On Mon, 26 Apr 2021 at 18:13, Nícolas F. R. A. Prado <nfraprado@collabora.com> wrote:
>>
>> Hi,
>>
>> thank you both for the feedback.
>>
>> Em 2021-04-26 07:38, Naushir Patuck escreveu:
>>
>> > Hi everyone,
>> >
>> > On Mon, 26 Apr 2021 at 03:40, Laurent Pinchart <
>> > laurent.pinchart@ideasonboard.com> wrote:
>> >
>> > > Hi Nícolas,
>> > >
>> > > Thank you for the patch.
>> > >
>> > > On Wed, Apr 21, 2021 at 01:51:36PM -0300, Nícolas F. R. A. Prado wrote:
>> > > > The QueueDepth property reports the minimum amount of requests needed in
>> > > > the camera pipeline.
>> > > >
>> > > > Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
>> > > > ---
>> > > >  src/libcamera/pipeline/ipu3/ipu3.cpp               | 4 ++++
>> > > >  src/libcamera/pipeline/raspberrypi/raspberrypi.cpp | 4 ++++
>> > > >  src/libcamera/pipeline/rkisp1/rkisp1.cpp           | 2 ++
>> > > >  src/libcamera/pipeline/rkisp1/rkisp1_path.h        | 4 ++--
>> > > >  src/libcamera/pipeline/simple/simple.cpp           | 6 ++++--
>> > > >  src/libcamera/pipeline/uvcvideo/uvcvideo.cpp       | 2 ++
>> > > >  src/libcamera/pipeline/vimc/vimc.cpp               | 3 +++
>> > > >  src/libcamera/property_ids.yaml                    | 5 +++++
>> > > >  8 files changed, 26 insertions(+), 4 deletions(-)
>> > > >
>> > > > diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp
>> > > b/src/libcamera/pipeline/ipu3/ipu3.cpp
>> > > > index 51446fcf5bc1..6067db2f37a3 100644
>> > > > --- a/src/libcamera/pipeline/ipu3/ipu3.cpp
>> > > > +++ b/src/libcamera/pipeline/ipu3/ipu3.cpp
>> > > > @@ -1049,6 +1049,10 @@ int PipelineHandlerIPU3::registerCameras()
>> > > >               /* Initialize the camera properties. */
>> > > >               data->properties_ = cio2->sensor()->properties();
>> > > >
>> > > > +             /* TODO This can be changed to CIO2 after configuration,
>> > > but
>> > > > +              * both are 4 currently */
>> > >
>> > >                 /*
>> > >                  * \todo This can be changed to CIO2 after configuration,
>> > > but
>> > >                  * both are 4 currently
>> > >                  */
>> > >
>> > > The \todo is doxygen style and is used through the code base. The
>> > > placement of /* */ is our standard coding style. Same below.
>> > >
>> > > > +
>> > > > +
>> > > > +             data->properties_.set(properties::QueueDepth, 4);
>> > >
>> > > I think you can already lower the value, to 1 or 2 depending on the
>> > > definition of the property (see below).
>> > >
>> > > > +
>> > > >               ret = initControls(data.get());
>> > > >               if (ret)
>> > > >                       continue;
>> > > > diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
>> > > b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
>> > > > index 2a917455500f..8d1ade3a4352 100644
>> > > > --- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
>> > > > +++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
>> > > > @@ -1035,6 +1035,10 @@ bool PipelineHandlerRPi::match(DeviceEnumerator
>> > > *enumerator)
>> > > >       /* Initialize the camera properties. */
>> > > >       data->properties_ = data->sensor_->properties();
>> > > >
>> > > > +     /* TODO Can be 1, 2 or 4 depending on configuration, for now use
>> > > the max
>> > > > +      * which is 4 */
>> > > > +     data->properties_.set(properties::QueueDepth, 4);
>> > >
>> > > Same here, Naush commented that this should be 1, but depending on how
>> > > we define the control, 2 could be a better value.
>> > >
>> > > > +
>> > > >       /*
>> > > >        * Set a default value for the ScalerCropMaximum property to show
>> > > >        * that we support its use, however, initialise it to zero because
>> > > > diff --git a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
>> > > b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
>> > > > index 549f4a4e61a8..7d876e9387d7 100644
>> > > > --- a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
>> > > > +++ b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
>> > > > @@ -21,6 +21,7 @@
>> > > >  #include <libcamera/ipa/core_ipa_interface.h>
>> > > >  #include <libcamera/ipa/rkisp1_ipa_interface.h>
>> > > >  #include <libcamera/ipa/rkisp1_ipa_proxy.h>
>> > > > +#include <libcamera/property_ids.h>
>> > > >  #include <libcamera/request.h>
>> > > >  #include <libcamera/stream.h>
>> > > >
>> > > > @@ -940,6 +941,7 @@ int PipelineHandlerRkISP1::createCamera(MediaEntity
>> > > *sensor)
>> > > >
>> > > >       /* Initialize the camera properties. */
>> > > >       data->properties_ = data->sensor_->properties();
>> > > > +     data->properties_.set(properties::QueueDepth, RKISP1_BUFFER_COUNT);
>> > >
>> > > Here 3 would be a better value, to match the driver. However, I wonder
>> > > if the hardware really needs 3 buffers. Dafna, Helen, do you have
>> > > information about this ? A quick glance at the driver makes me think we
>> > > could lower the number of buffers to 2. Lowering it to 1 may be nice,
>> > > but probably more tricky.
>> > >
>> > > >
>> > > >       /*
>> > > >        * \todo Read dealy values from the sensor itself or from a
>> > > > diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
>> > > b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
>> > > > index 3b3e37d258d0..7540dd41ad84 100644
>> > > > --- a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
>> > > > +++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
>> > > > @@ -26,6 +26,8 @@ class V4L2Subdevice;
>> > > >  struct StreamConfiguration;
>> > > >  struct V4L2SubdeviceFormat;
>> > > >
>> > > > +static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
>> > > > +
>> > > >  class RkISP1Path
>> > > >  {
>> > > >  public:
>> > > > @@ -56,8 +58,6 @@ public:
>> > > >       Signal<FrameBuffer *> &bufferReady() { return video_->bufferReady;
>> > > }
>> > > >
>> > > >  private:
>> > > > -     static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
>> > > > -
>> > > >       const char *name_;
>> > > >       bool running_;
>> > > >
>> > > > diff --git a/src/libcamera/pipeline/simple/simple.cpp
>> > > b/src/libcamera/pipeline/simple/simple.cpp
>> > > > index f6095d38e97a..6ee24f2f14e8 100644
>> > > > --- a/src/libcamera/pipeline/simple/simple.cpp
>> > > > +++ b/src/libcamera/pipeline/simple/simple.cpp
>> > > > @@ -22,6 +22,7 @@
>> > > >  #include <linux/media-bus-format.h>
>> > > >
>> > > >  #include <libcamera/camera.h>
>> > > > +#include <libcamera/property_ids.h>
>> > > >  #include <libcamera/request.h>
>> > > >  #include <libcamera/stream.h>
>> > > >
>> > > > @@ -141,6 +142,8 @@ static const SimplePipelineInfo supportedDevices[] =
>> > > {
>> > > >
>> > > >  } /* namespace */
>> > > >
>> > > > +static constexpr unsigned int kNumInternalBuffers = 3;
>> > > > +
>> > > >  class SimpleCameraData : public CameraData
>> > > >  {
>> > > >  public:
>> > > > @@ -238,8 +241,6 @@ protected:
>> > > >       int queueRequestDevice(Camera *camera, Request *request) override;
>> > > >
>> > > >  private:
>> > > > -     static constexpr unsigned int kNumInternalBuffers = 3;
>> > > > -
>> > >
>> > > Is this change needed ?
>> > >
>> > > >       SimpleCameraData *cameraData(const Camera *camera)
>> > > >       {
>> > > >               return static_cast<SimpleCameraData *>(
>> > > > @@ -424,6 +425,7 @@ int SimpleCameraData::init()
>> > > >       }
>> > > >
>> > > >       properties_ = sensor_->properties();
>> > > > +     properties_.set(properties::QueueDepth, kNumInternalBuffers);
>> > > >
>> > > >       return 0;
>> > > >  }
>> > > > diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
>> > > b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
>> > > > index b6c6ade5ebaf..591f46b60d23 100644
>> > > > --- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
>> > > > +++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
>> > > > @@ -525,6 +525,8 @@ int UVCCameraData::init(MediaDevice *media)
>> > > >       properties_.set(properties::PixelArraySize, resolution);
>> > > >       properties_.set(properties::PixelArrayActiveAreas, {
>> > > Rectangle(resolution) });
>> > > >
>> > > > +     properties_.set(properties::QueueDepth, 4);
>> > >
>> > > You can set this to 1 or 2 (again, see below).
>> > >
>> > > > +
>> > > >       /* Initialise the supported controls. */
>> > > >       ControlInfoMap::Map ctrls;
>> > > >
>> > > > diff --git a/src/libcamera/pipeline/vimc/vimc.cpp
>> > > b/src/libcamera/pipeline/vimc/vimc.cpp
>> > > > index 8f5f4ba30953..605b3fe89152 100644
>> > > > --- a/src/libcamera/pipeline/vimc/vimc.cpp
>> > > > +++ b/src/libcamera/pipeline/vimc/vimc.cpp
>> > > > @@ -20,6 +20,7 @@
>> > > >  #include <libcamera/formats.h>
>> > > >  #include <libcamera/ipa/ipa_interface.h>
>> > > >  #include <libcamera/ipa/ipa_module_info.h>
>> > > > +#include <libcamera/property_ids.h>
>> > > >  #include <libcamera/request.h>
>> > > >  #include <libcamera/stream.h>
>> > > >
>> > > > @@ -516,6 +517,8 @@ int VimcCameraData::init()
>> > > >       /* Initialize the camera properties. */
>> > > >       properties_ = sensor_->properties();
>> > > >
>> > > > +     properties_.set(properties::QueueDepth, 4);
>> > >
>> > > And here, 2 should be enough.
>> > >
>> > > > +
>> > > >       return 0;
>> > > >  }
>> > > >
>> > > > diff --git a/src/libcamera/property_ids.yaml
>> > > b/src/libcamera/property_ids.yaml
>> > > > index 104e9aaf4fa3..0b7d1271a26b 100644
>> > > > --- a/src/libcamera/property_ids.yaml
>> > > > +++ b/src/libcamera/property_ids.yaml
>> > > > @@ -678,6 +678,11 @@ controls:
>> > > >          \todo Turn this property into a "maximum control value" for the
>> > > >          ScalerCrop control once "dynamic" controls have been
>> > > implemented.
>> > > >
>> > > > +  - QueueDepth:
>> > > > +      type: int32_t
>> > > > +      description: |
>> > > > +        Minimum amount of requests needed in the camera pipeline.
>> > >
>> > > I think this needs to be better defined.
>> > >
>> > > First of all, the name QueueDepth doesn't really relate to concepts that
>> > > are explicitly defined in libcamera. We should thus either define those
>> > > concepts, or, if we want to define the property in a self-contained way,
>> > > change the name to match the description of the property. This could for
>> > > instance become MinNumRequests.

I think having some kind of "MinNumRequests" is reasonable.
Applications might need to know in order to make the pipeline handler
function correctly. Anyone have a nicer name for it? :)

>> > >
>> > > Then, we should also expand the definition of the control. This requires
>> > > a bit of design work. What do we want to convey here ? Let's start the
>> > > brainstorming session, which means you can challenge anything I'll
>> > > propose here, and present any new idea.
>> > >
>> > > There are multiple reasons why we need a minimum number of requests
>> > > queued:
>> > >
>> > > - Fulfilling driver requirements, which are related to hardware
>> > >   requirements.
>> > >
>> > >   It is common for video capture hardware to require multiple buffers to
>> > >   be queued to the hardware. A very common situation (mostly seen in
>> > >   low-end hardware) is to have two buffer address registers for the
>> > >   capture DMA engine, and ping-pong between the two of them. The
>> > >   hardware in that case can't deal with buffer underruns, so the driver
>> > >   needs to supply two buffers before capture can be started. The driver
>> > >   is also limited by the hardware when it comes to buffer completion.
>> > >   When a frame capture complete, a new buffer is needed to replace the
>> > >   completed one. Drivers usually hold on the completed buffer if no new
>> > >   buffer is available, so that the DMA engine will always have two
>> > >   buffers available. This means that queueing two buffers would start
>> > >   the capture, and a third buffer is needed for the driver to send
>> > >   completed buffers back to userspace. A similar situation occurs when
>> > >   the hardware uses shadow registers instead of an explicit ping-pong
>> > >   mechanism.
>> > >
>> > >   There are several things that drivers can (and should) do to minimize
>> > >   the impact on userspace. One of them would be to allocate a scratch
>> > >   buffer to be used when userspace doesn't provide buffers fast enough.
>> > >   This is costly in terms of memory. Userspace is in a better position
>> > >   to handle scratch buffers, and skip them if it can guarantee that
>> > >   there will be no buffer queue underrun. However, when the device is
>> > >   behind an IOMMU, a single physical page can be allocated and mapped to
>> > >   the device as a large DMA area that spans a full frame, which makes
>> > >   scratch buffers cheap if handled by the kernel.
>> > >
>> > >   Another contingency measure is possible in the ping-pong case. If the
>> > >   hardware requires buffers A and B to be available, capture can start
>> > >   immediately when two buffers (let's call them 1 and 2) are queued.
>> > >   When capture to A completes, the driver needs to reprogram the address
>> > >   of buffer A. If a third buffer is available in the queue at that time,
>> > >   it will use it.  Otherwise, it could reuse buffer 2 to program address
>> > >   A. This means that, when capture to B completes, that frame will be
>> > >   dropped, as the buffer will be reused immediately for A. This makes it
>> > >   possible to dequeue buffers with only two buffers queued instead of
>> > >   three. Using less than three buffers would however still result in
>> > >   frames being dropped.
>> > >
>> > >   Note that these constraints should be applicable to inline hardware
>> > >   only. For offline ISPs (memory-to-memory), the hardware shouldn't have
>> > >   such constraints, and should be fine with starting processing as soon
>> > >   as one buffer is available, and to stop processing until a new buffer
>> > >   is available in case of a buffer queue underrun. That's at least the
>> > >   case for sane hardware, and we may not be able to completely exclude
>> > >   insanity on the hardware side.

Beyond the "minimum number of requests" I think questions about how
many buffers/requests are actually best left to applications. Talking
about "optimal" or "recommended" numbers of buffers is a bit dangerous
in my view, as applications should know what they need.

For example, if I'm stopping and then restarting the camera for a
single stills capture then just 1 buffer is perfectly optimal.

If you want to record video you might need 3 or (allowing for the
encoder on the end) 4 buffers. But if you're sharing that buffer with
the display system then you probably also have another buffer on
screen, and another queued up waiting to go on-screen at the next
display vsync. So now we're up to at least 6 buffers. That's not
counting my TensorFlowLite image analysis that's happening on another
buffer nor my OpenCV-based custom image enhancements...

We possibly need to document some guidance here, but I think we can
trust the developers to understand this kind of thing. It needs to be
easy to say how many buffers you need, and it would also be good to
have readily available statistics on how many frames we're dropping,
particularly in the kernel (perhaps this is so already?).

>> > >
>> > > - Peeking ahead in the request parameters.
>> > >
>> > >   It can take time for some parameters to be applied. For instance, it's
>> > >   quite typical for the camera sensor's exposure time and gain to
>> > >   require 2 and 1 frames to be applied. This means that, to guarantee
>> > >   proper function of per-frame control, we need to have parameters
>> > >   available in advance. When running with auto-exposure enabled this may
>> > >   be less of a problem for the exposure time and gain, but if the
>> > >   application wants to control those parameters manually, we need enough
>> > >   requests in the queue.
>> > >
>> > >   Generally speaking, a camera pipeline can be composed of multiple
>> > >   processing stages. A typical case has a camera sensor connected to an
>> > >   inline ISP (or just a CSI-2 receiver) to capture raw images, followed
>> > >   by an offline (memory-to-memory) ISP to process raw frames and output
>> > >   YUV. The result can then be encoded to JPEG by a memory-to-memory
>> > >   encoder (this is only relevant for the Android camera HAL
>> > >   implementation, the libcamera core doesn't deal with JPEG encoding).
>> > >   While frame N is being encoded to JPEG, frame N+1 is being processed
>> > >   by the offline ISP, frame N+2 is being read out from the sensor (and
>> > >   possibly processed by the inline ISP), and frame N+3 may be exposed.
>> > >   Even if the hardware doesn't require more than one buffer to be queued
>> > >   to function, if request N contains parameters for the camera sensor,
>> > >   for the offline ISP, and for the JPEG encoder, those parameters need
>> > >   to be available to libcamera three frames ealier to be applied at the
>> > >   right time.
>> > >
>> > >   Note that some processing steps could in theory take more than the
>> > >   time of one frame to complete, if they're pipelined at the hardware
>> > >   level, or if they can be run in parallel for multiple frames (for
>> > >   instance we could have two JPEG encoders that take the time of one
>> > >   frame and a half to encode a frame). I'm not sure if this would impact
>> > >   what we need to expose to applications.
>> > >
>> > > The question is how to translate this to requirements exposed to
>> > > applications. Going back to the IPU3 and RPi pipeline handlers, both
>> > > have the same hardware architecture, with a CSI-2 receiver capturing to
>> > > memory, and an offline ISP. The hardware and drivers will operate with a
>> > > single buffer, but the inline part will drop frames if we don't have at
>> > > least two buffers for raw frames (which may be internal buffers if the
>> > > application only captures YUV streams, or application-provided buffers
>> > > if the application also captures a RAW stream). A minimum of 1 request
>> > > will be able to capture frames in suboptimal conditions. 2 requests
>> > > would be needed to avoid dropping frames, and 3 requests to ensure
>> > > manual exposure time can be applied early enough (a 4th request would
>> > > then be needed to handle the JPEG encoding, but as that's for the
>> > > Android camera HAL implementation only, the libcamera core doesn't need
>> > > to consider it). Note that, if IPA algorithms are available, we could
>> > > possibly get away with less requests as the IPA could compute parameters
>> > > earlier, but we'd be screwed if a request disables algorithms, which we
>> > > can't really predict.
>> > >
>> > > Let's also note that this doesn't mean that we need to have 3 or 4
>> > > buffers available, the buffers are only needed on the output side of the
>> > > pipeline, later than the parameters, but we currently have no way to
>> > > decouple buffers and controls.

The question of buffers and controls is a difficult one. Are we saying
that we expect the frames in a completed request to have applied the
controls that were passed when the request was made, even for analogue
gain and exposure? I know this was originally the plan, though it's
not clear to me that this is what happens currently...

I worry that this behaviour is a little awkward for applications.
Would it mean that applications must query how many frame delays there
are for this sensor, and create a corresponding number of buffers and
requests? What happens if an application makes too few (for example,
memory is a problem), do frames get dropped? That seems quite
unhelpful. What if an application needs more buffers, as per my
earlier example. Are all controls now delayed by being at the back of
this long queue?

I can certainly see why this sort of behaviour might be desirable, not
least because it mimics the V4L2 request API. My own view has always
been more towards decoupling, because sensors are such nasty
asynchronous things (and this approach is not without problems too).
But these are quite big questions!

Best regards
David

>> > >
>> > > What should we report in such a case ? I think we need to provide enough
>> > > information for applications to implement the optimal case (no frame
>> > > dropped, controls applied on the right frame, ...). We probably also
>> > > want to let applications know what the bare minimum is in order to
>> > > capture frames in suboptimal conditions. For instance, an application
>> > > may want to capture frames with a very low frame rate, or even capture a
>> > > single snapshot, and it would probably be overkill to require queuing
>> > > more than one request in that case. That leaves open the question of how
>> > > to deal with drivers that need more than one buffer to operate (the
>> > > non-IOMMU case above). Do we want to support them or can we draw the
>> > > line there (I think we should support them, unfortunately) ? Do we want
>> > > to allocate scratch buffers in the pipeline handler to make this
>> > > completely transparent to the application, which would result in a waste
>> > > of memory if the application provides us with enough buffers at all
>> > > times (and would also give us "interesting" problems to solve, such as
>> > > deciding when to queue a scratch buffer at start time) ? In many cases
>> > > the IPA needs to run for a few frames before the algorithms stabilize,
>> > > which would require multiple requests to be queued anyway (a single
>> > > snapshot use case could be implemented with a helper on top of
>> > > libcamera), but if an application disables all algorithms and provides
>> > > manual values for all controls, it could be feasible to capture a proper
>> > > snapshot with a single buffer and a single request.
>> > >
>> > > Naush, David, your input would be welcome here, both on the explanation
>> > > of the issue (are my understanding and explanation correct ?), and on
>> > > the questions that follow.
>> > >
>> >
>> > I think Laurent's explanation provides a comprehensive summary of what
>> > this control might need to encompass. To provide a bit more context, for
>> > Raspberry Pi, our CSI2 device driver (Unicam) does use a scratch buffer
>> > if a framebuffer is not provided by the application. This scratch buffer is
>> > 16k (single page) in size, as our hardware allows us to truncate or run in
>> > circular buffer mode. I intentionally left allocation and control of the
>> > scratch
>> > buffer within the kernel driver as buffer handling is driven by the device
>> > interrupt handler for timing guarantees. Given that Unicam writes out to
>> > memory, and our ISP is purely a memory to memory device, I have suggested
>> > our minimum queue depth be set to 1 for guaranteed operation.
>> >
>> > Having said that, there is an interesting point raised by Laurent about
>> > manual
>> > exposure and gain controls. I believe we have always said that these
>> > controls don't need to be consumed by the buffer returned in the Request
>> > that set them (i.e. the gain/exposure updates can return N frames later),
>> > is that still correct? Of course, setting manual exposure/gain before
>> > starting
>> > streaming would (should!) guarantee the buffer returned in the Request
>> > will have the correct values set.
>> >
>> > In my opinion, I don't see much value in tying this QueueDepth property with
>> > sensor delays - which can and will be different per sensor. If an
>> > application
>> > has requested manual exposure/gain values *while in a streaming state*
>> > it might just have to scan the metadata on each returned framebuffer until
>> > it has the frame with the values it requested. This can be easily done in
>> > it's request/return loop without needing to know the frame delays, so no
>> > need to adjust QueueDepth for this.
>>
>> This seems like it needs more discussion and those of you already involved in
>> libcamera understand the implications better than me :).
>>
>> My rationale is: With this series I added the flexibility to choose the number
>> of buffers to be allocated, but I wanted the code that doesn't care about that
>> to keep working, by allocating an amount of buffers that just works (like the
>> current lc-compliance tests, the internal tests, cam and qcam).
>>
>> So, based on your comments, how about I rename the property to
>> RecommendedNumRequests ? This would be the number of requests recommended in
>> order for everything to work well (both for the hardware pipeline and manually
>> setting controls for frames). It seems more discussion needs to happen in order
>> to determine if sensor delays should be accounted for or not, but in case they
>> are, then another property could be created as MinNumRequests, for the real
>> minimum number needed, although without guarantee that controls are set in time
>> and no frames are dropped.
>
>
> This may be possible, but probably not labelled as RecommendedNumRequests
> and MinNumRequest.  If  RecommendedNumRequests were to account for sensor
> delays, the property value would only really apply if the application were to set a
> manual exposure or gain (or any other delayed control).  So "Recommended" here
> would mean recommended only if/when you provide some manual controls  :-)
>
>>
>>
>> Regarding the sensor delay, I wonder about the possibility of the pipeline
>> handlers finding out the sensor delay based on the Model property (possibly in a
>> "database" that can be reused by all pipeline handlers). That way, during setup
>> the pipeline handler would export its MinNumRequest property based on a
>> hardcoded value since it knows the hardware requirements, and then
>> MinNumRequests + SensorDelay (or max() them? Not sure) to get
>> RecommendedNumRequests and export it.
>
>
> Raspberry Pi uses our CamHelper database for exactly this.  There is also the
> libcamera SensorDatabase that is building up functionality and would eventually
> replace CamHelper, where we would include the appropriate delay values.
>
> Regards,
> Naush
>
>>
>> Thanks,
>> Nícolas
>>
>> >
>> > Regards,
>> > Naush
>> >
>> >
>> >
>> > >
>> > > Nícolas, don't let this scare you, you don't have to solve this issue as
>> > > part of this patch series :-) We can merge it as a first step, and
>> > > continue working on top. I would however like this patch to define the
>> > > QueueDepth property a bit better. Based on the above, you can decide
>> > > what you want to represent with QueueDepth, and document and name it
>> > > accordingly. If you're not sure, we can of course discuss it.
>> > >
>> > > > +
>> > > >    #
>> > > ----------------------------------------------------------------------------
>> > > >    # Draft properties section
>> > > >
>> > >
>> > > --
>> > > Regards,
>> > >
>> > > Laurent Pinchart
>> > >
>>
>>
Nícolas F. R. A. Prado May 5, 2021, 8:07 p.m. UTC | #7
Hi everyone,

Em 2021-04-27 04:27, David Plowman escreveu:

> Hi everyone
>
> Just a few more random thoughts...
>
> On Mon, 26 Apr 2021 at 19:35, Naushir Patuck <naush@raspberrypi.com> wrote:
> >
> > Hi Nicolas,
> >
> > On Mon, 26 Apr 2021 at 18:13, Nícolas F. R. A. Prado <nfraprado@collabora.com> wrote:
> >>
> >> Hi,
> >>
> >> thank you both for the feedback.
> >>
> >> Em 2021-04-26 07:38, Naushir Patuck escreveu:
> >>
> >> > Hi everyone,
> >> >
> >> > On Mon, 26 Apr 2021 at 03:40, Laurent Pinchart <
> >> > laurent.pinchart@ideasonboard.com> wrote:
> >> >
> >> > > Hi Nícolas,
> >> > >
> >> > > Thank you for the patch.
> >> > >
> >> > > On Wed, Apr 21, 2021 at 01:51:36PM -0300, Nícolas F. R. A. Prado wrote:
> >> > > > The QueueDepth property reports the minimum amount of requests needed in
> >> > > > the camera pipeline.
> >> > > >
> >> > > > Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
> >> > > > ---
> >> > > >  src/libcamera/pipeline/ipu3/ipu3.cpp               | 4 ++++
> >> > > >  src/libcamera/pipeline/raspberrypi/raspberrypi.cpp | 4 ++++
> >> > > >  src/libcamera/pipeline/rkisp1/rkisp1.cpp           | 2 ++
> >> > > >  src/libcamera/pipeline/rkisp1/rkisp1_path.h        | 4 ++--
> >> > > >  src/libcamera/pipeline/simple/simple.cpp           | 6 ++++--
> >> > > >  src/libcamera/pipeline/uvcvideo/uvcvideo.cpp       | 2 ++
> >> > > >  src/libcamera/pipeline/vimc/vimc.cpp               | 3 +++
> >> > > >  src/libcamera/property_ids.yaml                    | 5 +++++
> >> > > >  8 files changed, 26 insertions(+), 4 deletions(-)
> >> > > >

<snip>

> >> > > > diff --git a/src/libcamera/property_ids.yaml
> >> > > b/src/libcamera/property_ids.yaml
> >> > > > index 104e9aaf4fa3..0b7d1271a26b 100644
> >> > > > --- a/src/libcamera/property_ids.yaml
> >> > > > +++ b/src/libcamera/property_ids.yaml
> >> > > > @@ -678,6 +678,11 @@ controls:
> >> > > >          \todo Turn this property into a "maximum control value" for the
> >> > > >          ScalerCrop control once "dynamic" controls have been
> >> > > implemented.
> >> > > >
> >> > > > +  - QueueDepth:
> >> > > > +      type: int32_t
> >> > > > +      description: |
> >> > > > +        Minimum amount of requests needed in the camera pipeline.
> >> > >
> >> > > I think this needs to be better defined.
> >> > >
> >> > > First of all, the name QueueDepth doesn't really relate to concepts that
> >> > > are explicitly defined in libcamera. We should thus either define those
> >> > > concepts, or, if we want to define the property in a self-contained way,
> >> > > change the name to match the description of the property. This could for
> >> > > instance become MinNumRequests.
>
> I think having some kind of "MinNumRequests" is reasonable.
> Applications might need to know in order to make the pipeline handler
> function correctly. Anyone have a nicer name for it? :)
>
> >> > >
> >> > > Then, we should also expand the definition of the control. This requires
> >> > > a bit of design work. What do we want to convey here ? Let's start the
> >> > > brainstorming session, which means you can challenge anything I'll
> >> > > propose here, and present any new idea.
> >> > >
> >> > > There are multiple reasons why we need a minimum number of requests
> >> > > queued:
> >> > >
> >> > > - Fulfilling driver requirements, which are related to hardware
> >> > >   requirements.
> >> > >
> >> > >   It is common for video capture hardware to require multiple buffers to
> >> > >   be queued to the hardware. A very common situation (mostly seen in
> >> > >   low-end hardware) is to have two buffer address registers for the
> >> > >   capture DMA engine, and ping-pong between the two of them. The
> >> > >   hardware in that case can't deal with buffer underruns, so the driver
> >> > >   needs to supply two buffers before capture can be started. The driver
> >> > >   is also limited by the hardware when it comes to buffer completion.
> >> > >   When a frame capture complete, a new buffer is needed to replace the
> >> > >   completed one. Drivers usually hold on the completed buffer if no new
> >> > >   buffer is available, so that the DMA engine will always have two
> >> > >   buffers available. This means that queueing two buffers would start
> >> > >   the capture, and a third buffer is needed for the driver to send
> >> > >   completed buffers back to userspace. A similar situation occurs when
> >> > >   the hardware uses shadow registers instead of an explicit ping-pong
> >> > >   mechanism.
> >> > >
> >> > >   There are several things that drivers can (and should) do to minimize
> >> > >   the impact on userspace. One of them would be to allocate a scratch
> >> > >   buffer to be used when userspace doesn't provide buffers fast enough.
> >> > >   This is costly in terms of memory. Userspace is in a better position
> >> > >   to handle scratch buffers, and skip them if it can guarantee that
> >> > >   there will be no buffer queue underrun. However, when the device is
> >> > >   behind an IOMMU, a single physical page can be allocated and mapped to
> >> > >   the device as a large DMA area that spans a full frame, which makes
> >> > >   scratch buffers cheap if handled by the kernel.
> >> > >
> >> > >   Another contingency measure is possible in the ping-pong case. If the
> >> > >   hardware requires buffers A and B to be available, capture can start
> >> > >   immediately when two buffers (let's call them 1 and 2) are queued.
> >> > >   When capture to A completes, the driver needs to reprogram the address
> >> > >   of buffer A. If a third buffer is available in the queue at that time,
> >> > >   it will use it.  Otherwise, it could reuse buffer 2 to program address
> >> > >   A. This means that, when capture to B completes, that frame will be
> >> > >   dropped, as the buffer will be reused immediately for A. This makes it
> >> > >   possible to dequeue buffers with only two buffers queued instead of
> >> > >   three. Using less than three buffers would however still result in
> >> > >   frames being dropped.
> >> > >
> >> > >   Note that these constraints should be applicable to inline hardware
> >> > >   only. For offline ISPs (memory-to-memory), the hardware shouldn't have
> >> > >   such constraints, and should be fine with starting processing as soon
> >> > >   as one buffer is available, and to stop processing until a new buffer
> >> > >   is available in case of a buffer queue underrun. That's at least the
> >> > >   case for sane hardware, and we may not be able to completely exclude
> >> > >   insanity on the hardware side.
>
> Beyond the "minimum number of requests" I think questions about how
> many buffers/requests are actually best left to applications. Talking
> about "optimal" or "recommended" numbers of buffers is a bit dangerous
> in my view, as applications should know what they need.
>
> For example, if I'm stopping and then restarting the camera for a
> single stills capture then just 1 buffer is perfectly optimal.
>
> If you want to record video you might need 3 or (allowing for the
> encoder on the end) 4 buffers. But if you're sharing that buffer with
> the display system then you probably also have another buffer on
> screen, and another queued up waiting to go on-screen at the next
> display vsync. So now we're up to at least 6 buffers. That's not
> counting my TensorFlowLite image analysis that's happening on another
> buffer nor my OpenCV-based custom image enhancements...
>
> We possibly need to document some guidance here, but I think we can
> trust the developers to understand this kind of thing. It needs to be
> easy to say how many buffers you need, and it would also be good to
> have readily available statistics on how many frames we're dropping,
> particularly in the kernel (perhaps this is so already?).
>
> >> > >
> >> > > - Peeking ahead in the request parameters.
> >> > >
> >> > >   It can take time for some parameters to be applied. For instance, it's
> >> > >   quite typical for the camera sensor's exposure time and gain to
> >> > >   require 2 and 1 frames to be applied. This means that, to guarantee
> >> > >   proper function of per-frame control, we need to have parameters
> >> > >   available in advance. When running with auto-exposure enabled this may
> >> > >   be less of a problem for the exposure time and gain, but if the
> >> > >   application wants to control those parameters manually, we need enough
> >> > >   requests in the queue.
> >> > >
> >> > >   Generally speaking, a camera pipeline can be composed of multiple
> >> > >   processing stages. A typical case has a camera sensor connected to an
> >> > >   inline ISP (or just a CSI-2 receiver) to capture raw images, followed
> >> > >   by an offline (memory-to-memory) ISP to process raw frames and output
> >> > >   YUV. The result can then be encoded to JPEG by a memory-to-memory
> >> > >   encoder (this is only relevant for the Android camera HAL
> >> > >   implementation, the libcamera core doesn't deal with JPEG encoding).
> >> > >   While frame N is being encoded to JPEG, frame N+1 is being processed
> >> > >   by the offline ISP, frame N+2 is being read out from the sensor (and
> >> > >   possibly processed by the inline ISP), and frame N+3 may be exposed.
> >> > >   Even if the hardware doesn't require more than one buffer to be queued
> >> > >   to function, if request N contains parameters for the camera sensor,
> >> > >   for the offline ISP, and for the JPEG encoder, those parameters need
> >> > >   to be available to libcamera three frames ealier to be applied at the
> >> > >   right time.
> >> > >
> >> > >   Note that some processing steps could in theory take more than the
> >> > >   time of one frame to complete, if they're pipelined at the hardware
> >> > >   level, or if they can be run in parallel for multiple frames (for
> >> > >   instance we could have two JPEG encoders that take the time of one
> >> > >   frame and a half to encode a frame). I'm not sure if this would impact
> >> > >   what we need to expose to applications.
> >> > >
> >> > > The question is how to translate this to requirements exposed to
> >> > > applications. Going back to the IPU3 and RPi pipeline handlers, both
> >> > > have the same hardware architecture, with a CSI-2 receiver capturing to
> >> > > memory, and an offline ISP. The hardware and drivers will operate with a
> >> > > single buffer, but the inline part will drop frames if we don't have at
> >> > > least two buffers for raw frames (which may be internal buffers if the
> >> > > application only captures YUV streams, or application-provided buffers
> >> > > if the application also captures a RAW stream). A minimum of 1 request
> >> > > will be able to capture frames in suboptimal conditions. 2 requests
> >> > > would be needed to avoid dropping frames, and 3 requests to ensure
> >> > > manual exposure time can be applied early enough (a 4th request would
> >> > > then be needed to handle the JPEG encoding, but as that's for the
> >> > > Android camera HAL implementation only, the libcamera core doesn't need
> >> > > to consider it). Note that, if IPA algorithms are available, we could
> >> > > possibly get away with less requests as the IPA could compute parameters
> >> > > earlier, but we'd be screwed if a request disables algorithms, which we
> >> > > can't really predict.
> >> > >
> >> > > Let's also note that this doesn't mean that we need to have 3 or 4
> >> > > buffers available, the buffers are only needed on the output side of the
> >> > > pipeline, later than the parameters, but we currently have no way to
> >> > > decouple buffers and controls.
>
> The question of buffers and controls is a difficult one. Are we saying
> that we expect the frames in a completed request to have applied the
> controls that were passed when the request was made, even for analogue
> gain and exposure? I know this was originally the plan, though it's
> not clear to me that this is what happens currently...
>
> I worry that this behaviour is a little awkward for applications.
> Would it mean that applications must query how many frame delays there
> are for this sensor, and create a corresponding number of buffers and
> requests? What happens if an application makes too few (for example,
> memory is a problem), do frames get dropped? That seems quite
> unhelpful. What if an application needs more buffers, as per my
> earlier example. Are all controls now delayed by being at the back of
> this long queue?
>
> I can certainly see why this sort of behaviour might be desirable, not
> least because it mimics the V4L2 request API. My own view has always
> been more towards decoupling, because sensors are such nasty
> asynchronous things (and this approach is not without problems too).
> But these are quite big questions!
>
> Best regards
> David
>
> >> > >
> >> > > What should we report in such a case ? I think we need to provide enough
> >> > > information for applications to implement the optimal case (no frame
> >> > > dropped, controls applied on the right frame, ...). We probably also
> >> > > want to let applications know what the bare minimum is in order to
> >> > > capture frames in suboptimal conditions. For instance, an application
> >> > > may want to capture frames with a very low frame rate, or even capture a
> >> > > single snapshot, and it would probably be overkill to require queuing
> >> > > more than one request in that case. That leaves open the question of how
> >> > > to deal with drivers that need more than one buffer to operate (the
> >> > > non-IOMMU case above). Do we want to support them or can we draw the
> >> > > line there (I think we should support them, unfortunately) ? Do we want
> >> > > to allocate scratch buffers in the pipeline handler to make this
> >> > > completely transparent to the application, which would result in a waste
> >> > > of memory if the application provides us with enough buffers at all
> >> > > times (and would also give us "interesting" problems to solve, such as
> >> > > deciding when to queue a scratch buffer at start time) ? In many cases
> >> > > the IPA needs to run for a few frames before the algorithms stabilize,
> >> > > which would require multiple requests to be queued anyway (a single
> >> > > snapshot use case could be implemented with a helper on top of
> >> > > libcamera), but if an application disables all algorithms and provides
> >> > > manual values for all controls, it could be feasible to capture a proper
> >> > > snapshot with a single buffer and a single request.
> >> > >
> >> > > Naush, David, your input would be welcome here, both on the explanation
> >> > > of the issue (are my understanding and explanation correct ?), and on
> >> > > the questions that follow.
> >> > >
> >> >
> >> > I think Laurent's explanation provides a comprehensive summary of what
> >> > this control might need to encompass. To provide a bit more context, for
> >> > Raspberry Pi, our CSI2 device driver (Unicam) does use a scratch buffer
> >> > if a framebuffer is not provided by the application. This scratch buffer is
> >> > 16k (single page) in size, as our hardware allows us to truncate or run in
> >> > circular buffer mode. I intentionally left allocation and control of the
> >> > scratch
> >> > buffer within the kernel driver as buffer handling is driven by the device
> >> > interrupt handler for timing guarantees. Given that Unicam writes out to
> >> > memory, and our ISP is purely a memory to memory device, I have suggested
> >> > our minimum queue depth be set to 1 for guaranteed operation.
> >> >
> >> > Having said that, there is an interesting point raised by Laurent about
> >> > manual
> >> > exposure and gain controls. I believe we have always said that these
> >> > controls don't need to be consumed by the buffer returned in the Request
> >> > that set them (i.e. the gain/exposure updates can return N frames later),
> >> > is that still correct? Of course, setting manual exposure/gain before
> >> > starting
> >> > streaming would (should!) guarantee the buffer returned in the Request
> >> > will have the correct values set.
> >> >
> >> > In my opinion, I don't see much value in tying this QueueDepth property with
> >> > sensor delays - which can and will be different per sensor. If an
> >> > application
> >> > has requested manual exposure/gain values *while in a streaming state*
> >> > it might just have to scan the metadata on each returned framebuffer until
> >> > it has the frame with the values it requested. This can be easily done in
> >> > it's request/return loop without needing to know the frame delays, so no
> >> > need to adjust QueueDepth for this.
> >>
> >> This seems like it needs more discussion and those of you already involved in
> >> libcamera understand the implications better than me :).
> >>
> >> My rationale is: With this series I added the flexibility to choose the number
> >> of buffers to be allocated, but I wanted the code that doesn't care about that
> >> to keep working, by allocating an amount of buffers that just works (like the
> >> current lc-compliance tests, the internal tests, cam and qcam).
> >>
> >> So, based on your comments, how about I rename the property to
> >> RecommendedNumRequests ? This would be the number of requests recommended in
> >> order for everything to work well (both for the hardware pipeline and manually
> >> setting controls for frames). It seems more discussion needs to happen in order
> >> to determine if sensor delays should be accounted for or not, but in case they
> >> are, then another property could be created as MinNumRequests, for the real
> >> minimum number needed, although without guarantee that controls are set in time
> >> and no frames are dropped.
> >
> >
> > This may be possible, but probably not labelled as RecommendedNumRequests
> > and MinNumRequest.  If  RecommendedNumRequests were to account for sensor
> > delays, the property value would only really apply if the application were to set a
> > manual exposure or gain (or any other delayed control).  So "Recommended" here
> > would mean recommended only if/when you provide some manual controls  :-)
> >
> >>
> >>
> >> Regarding the sensor delay, I wonder about the possibility of the pipeline
> >> handlers finding out the sensor delay based on the Model property (possibly in a
> >> "database" that can be reused by all pipeline handlers). That way, during setup
> >> the pipeline handler would export its MinNumRequest property based on a
> >> hardcoded value since it knows the hardware requirements, and then
> >> MinNumRequests + SensorDelay (or max() them? Not sure) to get
> >> RecommendedNumRequests and export it.
> >
> >
> > Raspberry Pi uses our CamHelper database for exactly this.  There is also the
> > libcamera SensorDatabase that is building up functionality and would eventually
> > replace CamHelper, where we would include the appropriate delay values.
> >
> > Regards,
> > Naush
> >
> >>
> >> Thanks,
> >> Nícolas
> >>
> >> >
> >> > Regards,
> >> > Naush
> >> >
> >> >
> >> >
> >> > >
> >> > > Nícolas, don't let this scare you, you don't have to solve this issue as
> >> > > part of this patch series :-) We can merge it as a first step, and
> >> > > continue working on top. I would however like this patch to define the
> >> > > QueueDepth property a bit better. Based on the above, you can decide
> >> > > what you want to represent with QueueDepth, and document and name it
> >> > > accordingly. If you're not sure, we can of course discuss it.

So, I gave the whole thread another good read, and it seems the one common
denominator is having the bare minimum of requests required by hardware, even if
it means dropping frames. There are use cases that don't care about dropping
frames, so it makes sense to set the bar this low.

Whether or not other properties defining the number of requests needed for
operation without dropping frames and/or for manual controls applied on specific
frames (which seems to be highly dependent on the user setup) will be later
added as well seems to still need more discussion to be determined.

So I'll go ahead and make the changes for v4 to make the purpose of this new
property clearer.

Thanks,
Nícolas

> >> > >
> >> > > > +
> >> > > >    #
> >> > > ----------------------------------------------------------------------------
> >> > > >    # Draft properties section
> >> > > >
> >> > >
> >> > > --
> >> > > Regards,
> >> > >
> >> > > Laurent Pinchart
> >> > >
> >>
> >>
>
> --
> To unsubscribe, send mail to kernel-unsubscribe@lists.collabora.co.uk.

Patch
diff mbox series

diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp b/src/libcamera/pipeline/ipu3/ipu3.cpp
index 51446fcf5bc1..6067db2f37a3 100644
--- a/src/libcamera/pipeline/ipu3/ipu3.cpp
+++ b/src/libcamera/pipeline/ipu3/ipu3.cpp
@@ -1049,6 +1049,10 @@  int PipelineHandlerIPU3::registerCameras()
 		/* Initialize the camera properties. */
 		data->properties_ = cio2->sensor()->properties();
 
+		/* TODO This can be changed to CIO2 after configuration, but
+		 * both are 4 currently */
+		data->properties_.set(properties::QueueDepth, 4);
+
 		ret = initControls(data.get());
 		if (ret)
 			continue;
diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
index 2a917455500f..8d1ade3a4352 100644
--- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
+++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
@@ -1035,6 +1035,10 @@  bool PipelineHandlerRPi::match(DeviceEnumerator *enumerator)
 	/* Initialize the camera properties. */
 	data->properties_ = data->sensor_->properties();
 
+	/* TODO Can be 1, 2 or 4 depending on configuration, for now use the max
+	 * which is 4 */
+	data->properties_.set(properties::QueueDepth, 4);
+
 	/*
 	 * Set a default value for the ScalerCropMaximum property to show
 	 * that we support its use, however, initialise it to zero because
diff --git a/src/libcamera/pipeline/rkisp1/rkisp1.cpp b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
index 549f4a4e61a8..7d876e9387d7 100644
--- a/src/libcamera/pipeline/rkisp1/rkisp1.cpp
+++ b/src/libcamera/pipeline/rkisp1/rkisp1.cpp
@@ -21,6 +21,7 @@ 
 #include <libcamera/ipa/core_ipa_interface.h>
 #include <libcamera/ipa/rkisp1_ipa_interface.h>
 #include <libcamera/ipa/rkisp1_ipa_proxy.h>
+#include <libcamera/property_ids.h>
 #include <libcamera/request.h>
 #include <libcamera/stream.h>
 
@@ -940,6 +941,7 @@  int PipelineHandlerRkISP1::createCamera(MediaEntity *sensor)
 
 	/* Initialize the camera properties. */
 	data->properties_ = data->sensor_->properties();
+	data->properties_.set(properties::QueueDepth, RKISP1_BUFFER_COUNT);
 
 	/*
 	 * \todo Read dealy values from the sensor itself or from a
diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.h b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
index 3b3e37d258d0..7540dd41ad84 100644
--- a/src/libcamera/pipeline/rkisp1/rkisp1_path.h
+++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.h
@@ -26,6 +26,8 @@  class V4L2Subdevice;
 struct StreamConfiguration;
 struct V4L2SubdeviceFormat;
 
+static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
+
 class RkISP1Path
 {
 public:
@@ -56,8 +58,6 @@  public:
 	Signal<FrameBuffer *> &bufferReady() { return video_->bufferReady; }
 
 private:
-	static constexpr unsigned int RKISP1_BUFFER_COUNT = 4;
-
 	const char *name_;
 	bool running_;
 
diff --git a/src/libcamera/pipeline/simple/simple.cpp b/src/libcamera/pipeline/simple/simple.cpp
index f6095d38e97a..6ee24f2f14e8 100644
--- a/src/libcamera/pipeline/simple/simple.cpp
+++ b/src/libcamera/pipeline/simple/simple.cpp
@@ -22,6 +22,7 @@ 
 #include <linux/media-bus-format.h>
 
 #include <libcamera/camera.h>
+#include <libcamera/property_ids.h>
 #include <libcamera/request.h>
 #include <libcamera/stream.h>
 
@@ -141,6 +142,8 @@  static const SimplePipelineInfo supportedDevices[] = {
 
 } /* namespace */
 
+static constexpr unsigned int kNumInternalBuffers = 3;
+
 class SimpleCameraData : public CameraData
 {
 public:
@@ -238,8 +241,6 @@  protected:
 	int queueRequestDevice(Camera *camera, Request *request) override;
 
 private:
-	static constexpr unsigned int kNumInternalBuffers = 3;
-
 	SimpleCameraData *cameraData(const Camera *camera)
 	{
 		return static_cast<SimpleCameraData *>(
@@ -424,6 +425,7 @@  int SimpleCameraData::init()
 	}
 
 	properties_ = sensor_->properties();
+	properties_.set(properties::QueueDepth, kNumInternalBuffers);
 
 	return 0;
 }
diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
index b6c6ade5ebaf..591f46b60d23 100644
--- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
+++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp
@@ -525,6 +525,8 @@  int UVCCameraData::init(MediaDevice *media)
 	properties_.set(properties::PixelArraySize, resolution);
 	properties_.set(properties::PixelArrayActiveAreas, { Rectangle(resolution) });
 
+	properties_.set(properties::QueueDepth, 4);
+
 	/* Initialise the supported controls. */
 	ControlInfoMap::Map ctrls;
 
diff --git a/src/libcamera/pipeline/vimc/vimc.cpp b/src/libcamera/pipeline/vimc/vimc.cpp
index 8f5f4ba30953..605b3fe89152 100644
--- a/src/libcamera/pipeline/vimc/vimc.cpp
+++ b/src/libcamera/pipeline/vimc/vimc.cpp
@@ -20,6 +20,7 @@ 
 #include <libcamera/formats.h>
 #include <libcamera/ipa/ipa_interface.h>
 #include <libcamera/ipa/ipa_module_info.h>
+#include <libcamera/property_ids.h>
 #include <libcamera/request.h>
 #include <libcamera/stream.h>
 
@@ -516,6 +517,8 @@  int VimcCameraData::init()
 	/* Initialize the camera properties. */
 	properties_ = sensor_->properties();
 
+	properties_.set(properties::QueueDepth, 4);
+
 	return 0;
 }
 
diff --git a/src/libcamera/property_ids.yaml b/src/libcamera/property_ids.yaml
index 104e9aaf4fa3..0b7d1271a26b 100644
--- a/src/libcamera/property_ids.yaml
+++ b/src/libcamera/property_ids.yaml
@@ -678,6 +678,11 @@  controls:
         \todo Turn this property into a "maximum control value" for the
         ScalerCrop control once "dynamic" controls have been implemented.
 
+  - QueueDepth:
+      type: int32_t
+      description: |
+        Minimum amount of requests needed in the camera pipeline.
+
   # ----------------------------------------------------------------------------
   # Draft properties section