[libcamera-devel,2/2] android: CameraDevice: Fix Camera3RequestDescriptor leakage
diff mbox series

Message ID 20210325111357.3862847-2-hiroh@chromium.org
State Superseded
Headers show
Series
  • [libcamera-devel,1/2] android: CameraDevice: Mark getResultMetadata() const function
Related show

Commit Message

Hirokazu Honda March 25, 2021, 11:13 a.m. UTC
CameraDevice creates Camera3RequestDescriptor in
processCaptureRequest() and disallocates in requestComplete().
Camera3RequestDescriptor can never be destroyed if
requestComplete() is never called. This avoid the memory
leakage by storing them in map CamerarRequestDescriptor.

Signed-off-by: Hirokazu Honda <hiroh@chromium.org>
---
 src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
 src/android/camera_device.h   |  6 +++-
 src/android/camera_worker.cpp |  4 +--
 src/android/camera_worker.h   |  2 +-
 4 files changed, 44 insertions(+), 35 deletions(-)

--
2.31.0.291.g576ba9dcdaf-goog

Comments

Jacopo Mondi March 25, 2021, 4:02 p.m. UTC | #1
Hi Hiro

On Thu, Mar 25, 2021 at 08:13:57PM +0900, Hirokazu Honda wrote:
> CameraDevice creates Camera3RequestDescriptor in
> processCaptureRequest() and disallocates in requestComplete().
> Camera3RequestDescriptor can never be destroyed if
> requestComplete() is never called. This avoid the memory
> leakage by storing them in map CamerarRequestDescriptor.

Very nice, I'm not particularly proud of this part of the
implementation as it is quite hard to follow, this makes it easier
indeed

One question below

>
> Signed-off-by: Hirokazu Honda <hiroh@chromium.org>
> ---
>  src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
>  src/android/camera_device.h   |  6 +++-
>  src/android/camera_worker.cpp |  4 +--
>  src/android/camera_worker.h   |  2 +-
>  4 files changed, 44 insertions(+), 35 deletions(-)
>
> diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
> index ae693664..50b017a3 100644
> --- a/src/android/camera_device.cpp
> +++ b/src/android/camera_device.cpp
> @@ -287,15 +287,17 @@ CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(
>
>  	/*
>  	 * Create the libcamera::Request unique_ptr<> to tie its lifetime
> -	 * to the descriptor's one. Set the descriptor's address as the
> -	 * request's cookie to retrieve it at completion time.
> +	 * to the descriptor's one.
>  	 */
> -	request_ = std::make_unique<CaptureRequest>(camera,
> -						    reinterpret_cast<uint64_t>(this));
> +	request_ = std::make_unique<CaptureRequest>(camera);
>  }
>
> +CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
>  CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;
>
> +CameraDevice::Camera3RequestDescriptor&
> +CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
> +
>  /*
>   * \class CameraDevice
>   *
> @@ -1811,8 +1813,8 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
>  	 * The descriptor and the associated memory reserved here are freed
>  	 * at request complete time.
>  	 */
> -	Camera3RequestDescriptor *descriptor =
> -		new Camera3RequestDescriptor(camera_.get(), camera3Request);
> +	Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
> +
>  	/*
>  	 * \todo The Android request model is incremental, settings passed in
>  	 * previous requests are to be effective until overridden explicitly in
> @@ -1822,12 +1824,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
>  	if (camera3Request->settings)
>  		lastSettings_ = camera3Request->settings;
>  	else
> -		descriptor->settings_ = lastSettings_;
> +		descriptor.settings_ = lastSettings_;
>
> -	LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
> -			<< " with " << descriptor->buffers_.size() << " streams";
> -	for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
> -		const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
> +	LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
> +			<< " with " << descriptor.buffers_.size() << " streams";
> +	for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
> +		const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
>  		camera3_stream *camera3Stream = camera3Buffer.stream;
>  		CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);
>
> @@ -1860,7 +1862,7 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
>  			 * lifetime management only.
>  			 */
>  			buffer = createFrameBuffer(*camera3Buffer.buffer);
> -			descriptor->frameBuffers_.emplace_back(buffer);
> +			descriptor.frameBuffers_.emplace_back(buffer);
>  			LOG(HAL, Debug) << ss.str() << " (direct)";
>  			break;
>
> @@ -1879,11 +1881,10 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
>
>  		if (!buffer) {
>  			LOG(HAL, Error) << "Failed to create buffer";
> -			delete descriptor;
>  			return -ENOMEM;
>  		}
>
> -		descriptor->request_->addBuffer(cameraStream->stream(), buffer,
> +		descriptor.request_->addBuffer(cameraStream->stream(), buffer,
>  						camera3Buffer.acquire_fence);
>  	}
>
> @@ -1891,11 +1892,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
>  	 * Translate controls from Android to libcamera and queue the request
>  	 * to the CameraWorker thread.
>  	 */
> -	int ret = processControls(descriptor);
> +	int ret = processControls(&descriptor);
>  	if (ret)
>  		return ret;
>
> -	worker_.queueRequest(descriptor->request_.get());
> +	worker_.queueRequest(descriptor.request_.get());
> +	descriptors_[descriptor.request_->cookie()] = std::move(descriptor);
>
>  	return 0;
>  }
> @@ -1905,8 +1907,13 @@ void CameraDevice::requestComplete(Request *request)
>  	const Request::BufferMap &buffers = request->buffers();
>  	camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
>  	std::unique_ptr<CameraMetadata> resultMetadata;
> -	Camera3RequestDescriptor *descriptor =
> -		reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
> +	auto node = descriptors_.extract(request->cookie());

So once node goes out of scope the mapped descriptor gets deleted.
Very nice.

However, shouldn't descriptors_ be cleared at stop() time or when a
new stream configuration is requested ?

> +	if (node.empty()) {
> +		LOG(HAL, Error) << "Unknown request: " << request->cookie();
> +		status = CAMERA3_BUFFER_STATUS_ERROR;
> +		return;
> +	}
> +	Camera3RequestDescriptor& descriptor = node.mapped();
>
>  	if (request->status() != Request::RequestComplete) {
>  		LOG(HAL, Error) << "Request not successfully completed: "
> @@ -1915,7 +1922,7 @@ void CameraDevice::requestComplete(Request *request)
>  	}
>
>  	LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
> -			<< descriptor->buffers_.size() << " streams";
> +			<< descriptor.buffers_.size() << " streams";
>
>  	/*
>  	 * \todo The timestamp used for the metadata is currently always taken
> @@ -1924,10 +1931,10 @@ void CameraDevice::requestComplete(Request *request)
>  	 * pipeline handlers) timestamp in the Request itself.
>  	 */
>  	uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
> -	resultMetadata = getResultMetadata(*descriptor, timestamp);
> +	resultMetadata = getResultMetadata(descriptor, timestamp);
>
>  	/* Handle any JPEG compression. */
> -	for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> +	for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
>  		CameraStream *cameraStream =
>  			static_cast<CameraStream *>(buffer.stream->priv);
>
> @@ -1942,7 +1949,7 @@ void CameraDevice::requestComplete(Request *request)
>
>  		int ret = cameraStream->process(*src,
>  						*buffer.buffer,
> -						descriptor->settings_,
> +						descriptor.settings_,
>  						resultMetadata.get());
>  		if (ret) {
>  			status = CAMERA3_BUFFER_STATUS_ERROR;
> @@ -1959,17 +1966,17 @@ void CameraDevice::requestComplete(Request *request)
>
>  	/* Prepare to call back the Android camera stack. */
>  	camera3_capture_result_t captureResult = {};
> -	captureResult.frame_number = descriptor->frameNumber_;
> -	captureResult.num_output_buffers = descriptor->buffers_.size();
> -	for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> +	captureResult.frame_number = descriptor.frameNumber_;
> +	captureResult.num_output_buffers = descriptor.buffers_.size();
> +	for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
>  		buffer.acquire_fence = -1;
>  		buffer.release_fence = -1;
>  		buffer.status = status;
>  	}
> -	captureResult.output_buffers = descriptor->buffers_.data();
> +	captureResult.output_buffers = descriptor.buffers_.data();
>
>  	if (status == CAMERA3_BUFFER_STATUS_OK) {
> -		notifyShutter(descriptor->frameNumber_, timestamp);
> +		notifyShutter(descriptor.frameNumber_, timestamp);
>
>  		captureResult.partial_result = 1;
>  		captureResult.result = resultMetadata->get();
> @@ -1982,13 +1989,11 @@ void CameraDevice::requestComplete(Request *request)
>  		 * is here signalled. Make sure the error path plays well with
>  		 * the camera stack state machine.
>  		 */
> -		notifyError(descriptor->frameNumber_,
> -			    descriptor->buffers_[0].stream);
> +		notifyError(descriptor.frameNumber_,
> +			    descriptor.buffers_[0].stream);
>  	}
>
>  	callbacks_->process_capture_result(callbacks_, &captureResult);
> -
> -	delete descriptor;
>  }
>
>  std::string CameraDevice::logPrefix() const
> diff --git a/src/android/camera_device.h b/src/android/camera_device.h
> index 11bdfec8..54d7c319 100644
> --- a/src/android/camera_device.h
> +++ b/src/android/camera_device.h
> @@ -69,9 +69,11 @@ private:
>  	CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);
>
>  	struct Camera3RequestDescriptor {
> +		Camera3RequestDescriptor();
> +		~Camera3RequestDescriptor();
>  		Camera3RequestDescriptor(libcamera::Camera *camera,
>  					 const camera3_capture_request_t *camera3Request);
> -		~Camera3RequestDescriptor();
> +		Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);
>
>  		uint32_t frameNumber_;
>  		std::vector<camera3_stream_buffer_t> buffers_;
> @@ -122,6 +124,8 @@ private:
>  	std::map<int, libcamera::PixelFormat> formatsMap_;
>  	std::vector<CameraStream> streams_;
>
> +	std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
> +
>  	std::string maker_;
>  	std::string model_;
>
> diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
> index df436460..300ddde0 100644
> --- a/src/android/camera_worker.cpp
> +++ b/src/android/camera_worker.cpp
> @@ -26,10 +26,10 @@ LOG_DECLARE_CATEGORY(HAL)
>   * by the CameraWorker which queues it to the libcamera::Camera after handling
>   * fences.
>   */
> -CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
> +CaptureRequest::CaptureRequest(libcamera::Camera *camera)
>  	: camera_(camera)
>  {
> -	request_ = camera_->createRequest(cookie);
> +	request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
>  }
>
>  void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
> diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
> index d7060576..64b1658b 100644
> --- a/src/android/camera_worker.h
> +++ b/src/android/camera_worker.h
> @@ -22,7 +22,7 @@ class CameraDevice;
>  class CaptureRequest
>  {
>  public:
> -	CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
> +	CaptureRequest(libcamera::Camera *camera);
>
>  	const std::vector<int> &fences() const { return acquireFences_; }
>  	libcamera::ControlList &controls() { return request_->controls(); }
> --
> 2.31.0.291.g576ba9dcdaf-goog
> _______________________________________________
> libcamera-devel mailing list
> libcamera-devel@lists.libcamera.org
> https://lists.libcamera.org/listinfo/libcamera-devel
Laurent Pinchart March 25, 2021, 5:21 p.m. UTC | #2
Hi Hiro,

Thank you for the patch, it's nice to see the HAL being cleaned up :-)

On Thu, Mar 25, 2021 at 05:02:43PM +0100, Jacopo Mondi wrote:
> On Thu, Mar 25, 2021 at 08:13:57PM +0900, Hirokazu Honda wrote:
> > CameraDevice creates Camera3RequestDescriptor in
> > processCaptureRequest() and disallocates in requestComplete().
> > Camera3RequestDescriptor can never be destroyed if
> > requestComplete() is never called. This avoid the memory
> > leakage by storing them in map CamerarRequestDescriptor.

s/CamerarRequestDescriptor/CameraRequestDescriptor/

When does this happen ? You've reported issues with the IPU3 pipeline
handler failing to queue a request due to fact that it has no internal
queue of requests waiting for hardware resources to be available. Is
that what you're addressing here, or is there something else ? The
libcamera core should guarantee that requestComplete() gets called for
all requests successfully queued, so if there are other leakages, I'd
like to investigate them.

> Very nice, I'm not particularly proud of this part of the
> implementation as it is quite hard to follow, this makes it easier
> indeed
> 
> One question below
> 
> >
> > Signed-off-by: Hirokazu Honda <hiroh@chromium.org>
> > ---
> >  src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
> >  src/android/camera_device.h   |  6 +++-
> >  src/android/camera_worker.cpp |  4 +--
> >  src/android/camera_worker.h   |  2 +-
> >  4 files changed, 44 insertions(+), 35 deletions(-)
> >
> > diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
> > index ae693664..50b017a3 100644
> > --- a/src/android/camera_device.cpp
> > +++ b/src/android/camera_device.cpp
> > @@ -287,15 +287,17 @@ CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(
> >
> >  	/*
> >  	 * Create the libcamera::Request unique_ptr<> to tie its lifetime
> > -	 * to the descriptor's one. Set the descriptor's address as the
> > -	 * request's cookie to retrieve it at completion time.
> > +	 * to the descriptor's one.
> >  	 */
> > -	request_ = std::make_unique<CaptureRequest>(camera,
> > -						    reinterpret_cast<uint64_t>(this));
> > +	request_ = std::make_unique<CaptureRequest>(camera);
> >  }
> >
> > +CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
> >  CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;
> >
> > +CameraDevice::Camera3RequestDescriptor&
> > +CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
> > +
> >  /*
> >   * \class CameraDevice
> >   *
> > @@ -1811,8 +1813,8 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >  	 * The descriptor and the associated memory reserved here are freed
> >  	 * at request complete time.
> >  	 */
> > -	Camera3RequestDescriptor *descriptor =
> > -		new Camera3RequestDescriptor(camera_.get(), camera3Request);
> > +	Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
> > +
> >  	/*
> >  	 * \todo The Android request model is incremental, settings passed in
> >  	 * previous requests are to be effective until overridden explicitly in
> > @@ -1822,12 +1824,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >  	if (camera3Request->settings)
> >  		lastSettings_ = camera3Request->settings;
> >  	else
> > -		descriptor->settings_ = lastSettings_;
> > +		descriptor.settings_ = lastSettings_;
> >
> > -	LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
> > -			<< " with " << descriptor->buffers_.size() << " streams";
> > -	for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
> > -		const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
> > +	LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
> > +			<< " with " << descriptor.buffers_.size() << " streams";
> > +	for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
> > +		const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
> >  		camera3_stream *camera3Stream = camera3Buffer.stream;
> >  		CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);
> >
> > @@ -1860,7 +1862,7 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >  			 * lifetime management only.
> >  			 */
> >  			buffer = createFrameBuffer(*camera3Buffer.buffer);
> > -			descriptor->frameBuffers_.emplace_back(buffer);
> > +			descriptor.frameBuffers_.emplace_back(buffer);
> >  			LOG(HAL, Debug) << ss.str() << " (direct)";
> >  			break;
> >
> > @@ -1879,11 +1881,10 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >
> >  		if (!buffer) {
> >  			LOG(HAL, Error) << "Failed to create buffer";
> > -			delete descriptor;
> >  			return -ENOMEM;
> >  		}
> >
> > -		descriptor->request_->addBuffer(cameraStream->stream(), buffer,
> > +		descriptor.request_->addBuffer(cameraStream->stream(), buffer,
> >  						camera3Buffer.acquire_fence);
> >  	}
> >
> > @@ -1891,11 +1892,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >  	 * Translate controls from Android to libcamera and queue the request
> >  	 * to the CameraWorker thread.
> >  	 */
> > -	int ret = processControls(descriptor);
> > +	int ret = processControls(&descriptor);
> >  	if (ret)
> >  		return ret;
> >
> > -	worker_.queueRequest(descriptor->request_.get());
> > +	worker_.queueRequest(descriptor.request_.get());
> > +	descriptors_[descriptor.request_->cookie()] = std::move(descriptor);
> >
> >  	return 0;
> >  }
> > @@ -1905,8 +1907,13 @@ void CameraDevice::requestComplete(Request *request)
> >  	const Request::BufferMap &buffers = request->buffers();
> >  	camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
> >  	std::unique_ptr<CameraMetadata> resultMetadata;
> > -	Camera3RequestDescriptor *descriptor =
> > -		reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
> > +	auto node = descriptors_.extract(request->cookie());
> 
> So once node goes out of scope the mapped descriptor gets deleted.
> Very nice.
> 
> However, shouldn't descriptors_ be cleared at stop() time or when a
> new stream configuration is requested ?

I'm wondering if we shouldn't go one step forward. I'll assume here that
this patch is dealing with failures to queue requests when calling
Camera::queueRequest(). If there are other issues, the comments below
may not apply to them.

When queuing a request to libcamera, we call
CameraWorker::queueRequest(). It's a void function that invokes
Worker::processRequest() as a queued call, so it returns immediately,
and we can't know if Worker::processRequest() will fail or not. We have
thus sent the request onto a journey, hoping it will be successfully
queued. There can be multiple reasons that won't be the case:

- CameraWorker::Worker::processRequest() failing to wait for a fence.
  Oops, shouldn't happen, but we still need to handle this gracefully.
  We're still in the HAL here, the request hasn't reached libcamera. The
  HAL should thus handle the failure. This is case number 1.

- CameraWorker::Worker::processRequest() calls CaptureRequest::queue(),
  which calls Camera::queueRequest(). That function can return an error
  if some of the sanity checks fail. We've reached libcamera, but it has
  failed synchronously, so the HAL should handle the failure. Still case
  number 1.

- At this point, the request is sent to PipelineHandler::queueRequest(),
  asynchronously again. That function doesn't perform any sanity check
  (but it could in the future), and queues the request to the pipeline
  handler with a call to PipelineHandler:queueRequestDevice(),
  synchronously. This call can fail. If the future sanity checks in
  PipelineHandler::queueRequest() or
  PipelineHandler:queueRequestDevice() fail, the request belongs to the
  libcamera core (the HAL has queued it successfully, and the pipeline
  handler hasn't accepted it). This is case number 2, and I believe this
  is the one that has triggered this patch.

- The pipeline handler has accepted the request, but something fails
  later. That's case number 3.

Now let's look at those three cases, in reverse order.

3. The request belongs to the pipeline handler, which must complete it.
We have support in the PipelineHandler base class to accept completion
of requests out of sequence, and reorder the completion events to the
HAL to guarantee they will be issued in sequence. The pipeline handler
can thus complete the request with an error state as soon as it detects
the error, and the HAL will eventually receive a requestComplete() call.
As far as I can tell, this is working correctly.

2. This is embarassing, we don't handle this at all.
PipelineHandler::queueRequest() returns an error code, but it never
reaches Camera::queueReuest() as the call to
PipelineHandler::queueRequest() is asynchronous. We should make
PipelineHandler::queueRequest() a void function to be less misleading,
and PipelineHandler::queueRequest() needs to signal request completion
in that case. I think we can reuse the same request completion ordering
mechanism and immediately mark the request as complete (with an error)
in PipelineHandler::queueRequest() in that case. It should be an easy
fix (famous last words... :-)).

1. This is the responsibility of the HAL, which should signal request
completion to the camera service. Depending on whether the camera
service can support out-of-order completion or not, we can signal
completion immediately, or need to keep the request in a queue and
complete it at the appropriate time.

As far as I understand, this patch is meant to address the memory leak
related to case 2, and also addresses as a consequence the leak related
to case 1 (case 3 should be working correctly already, if I'm not
mistaken). For case 2 I think the correct fix should be to ensure that
libcamera will complete the request, as explained above. For case 1, we
need a specific fix in the HAL, not only for the memory leak, but to
also complete the request towards the camera service.

Is this analysis correct, or am I missing something ?

> > +	if (node.empty()) {
> > +		LOG(HAL, Error) << "Unknown request: " << request->cookie();
> > +		status = CAMERA3_BUFFER_STATUS_ERROR;
> > +		return;
> > +	}
> > +	Camera3RequestDescriptor& descriptor = node.mapped();
> >
> >  	if (request->status() != Request::RequestComplete) {
> >  		LOG(HAL, Error) << "Request not successfully completed: "
> > @@ -1915,7 +1922,7 @@ void CameraDevice::requestComplete(Request *request)
> >  	}
> >
> >  	LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
> > -			<< descriptor->buffers_.size() << " streams";
> > +			<< descriptor.buffers_.size() << " streams";
> >
> >  	/*
> >  	 * \todo The timestamp used for the metadata is currently always taken
> > @@ -1924,10 +1931,10 @@ void CameraDevice::requestComplete(Request *request)
> >  	 * pipeline handlers) timestamp in the Request itself.
> >  	 */
> >  	uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
> > -	resultMetadata = getResultMetadata(*descriptor, timestamp);
> > +	resultMetadata = getResultMetadata(descriptor, timestamp);
> >
> >  	/* Handle any JPEG compression. */
> > -	for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > +	for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> >  		CameraStream *cameraStream =
> >  			static_cast<CameraStream *>(buffer.stream->priv);
> >
> > @@ -1942,7 +1949,7 @@ void CameraDevice::requestComplete(Request *request)
> >
> >  		int ret = cameraStream->process(*src,
> >  						*buffer.buffer,
> > -						descriptor->settings_,
> > +						descriptor.settings_,
> >  						resultMetadata.get());
> >  		if (ret) {
> >  			status = CAMERA3_BUFFER_STATUS_ERROR;
> > @@ -1959,17 +1966,17 @@ void CameraDevice::requestComplete(Request *request)
> >
> >  	/* Prepare to call back the Android camera stack. */
> >  	camera3_capture_result_t captureResult = {};
> > -	captureResult.frame_number = descriptor->frameNumber_;
> > -	captureResult.num_output_buffers = descriptor->buffers_.size();
> > -	for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > +	captureResult.frame_number = descriptor.frameNumber_;
> > +	captureResult.num_output_buffers = descriptor.buffers_.size();
> > +	for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> >  		buffer.acquire_fence = -1;
> >  		buffer.release_fence = -1;
> >  		buffer.status = status;
> >  	}
> > -	captureResult.output_buffers = descriptor->buffers_.data();
> > +	captureResult.output_buffers = descriptor.buffers_.data();
> >
> >  	if (status == CAMERA3_BUFFER_STATUS_OK) {
> > -		notifyShutter(descriptor->frameNumber_, timestamp);
> > +		notifyShutter(descriptor.frameNumber_, timestamp);
> >
> >  		captureResult.partial_result = 1;
> >  		captureResult.result = resultMetadata->get();
> > @@ -1982,13 +1989,11 @@ void CameraDevice::requestComplete(Request *request)
> >  		 * is here signalled. Make sure the error path plays well with
> >  		 * the camera stack state machine.
> >  		 */
> > -		notifyError(descriptor->frameNumber_,
> > -			    descriptor->buffers_[0].stream);
> > +		notifyError(descriptor.frameNumber_,
> > +			    descriptor.buffers_[0].stream);
> >  	}
> >
> >  	callbacks_->process_capture_result(callbacks_, &captureResult);
> > -
> > -	delete descriptor;
> >  }
> >
> >  std::string CameraDevice::logPrefix() const
> > diff --git a/src/android/camera_device.h b/src/android/camera_device.h
> > index 11bdfec8..54d7c319 100644
> > --- a/src/android/camera_device.h
> > +++ b/src/android/camera_device.h
> > @@ -69,9 +69,11 @@ private:
> >  	CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);
> >
> >  	struct Camera3RequestDescriptor {
> > +		Camera3RequestDescriptor();
> > +		~Camera3RequestDescriptor();
> >  		Camera3RequestDescriptor(libcamera::Camera *camera,
> >  					 const camera3_capture_request_t *camera3Request);
> > -		~Camera3RequestDescriptor();
> > +		Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);
> >
> >  		uint32_t frameNumber_;
> >  		std::vector<camera3_stream_buffer_t> buffers_;
> > @@ -122,6 +124,8 @@ private:
> >  	std::map<int, libcamera::PixelFormat> formatsMap_;
> >  	std::vector<CameraStream> streams_;
> >
> > +	std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
> > +
> >  	std::string maker_;
> >  	std::string model_;
> >
> > diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
> > index df436460..300ddde0 100644
> > --- a/src/android/camera_worker.cpp
> > +++ b/src/android/camera_worker.cpp
> > @@ -26,10 +26,10 @@ LOG_DECLARE_CATEGORY(HAL)
> >   * by the CameraWorker which queues it to the libcamera::Camera after handling
> >   * fences.
> >   */
> > -CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
> > +CaptureRequest::CaptureRequest(libcamera::Camera *camera)
> >  	: camera_(camera)
> >  {
> > -	request_ = camera_->createRequest(cookie);
> > +	request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
> >  }
> >
> >  void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
> > diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
> > index d7060576..64b1658b 100644
> > --- a/src/android/camera_worker.h
> > +++ b/src/android/camera_worker.h
> > @@ -22,7 +22,7 @@ class CameraDevice;
> >  class CaptureRequest
> >  {
> >  public:
> > -	CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
> > +	CaptureRequest(libcamera::Camera *camera);
> >
> >  	const std::vector<int> &fences() const { return acquireFences_; }
> >  	libcamera::ControlList &controls() { return request_->controls(); }
Hirokazu Honda March 28, 2021, 10:10 p.m. UTC | #3
Hi Jacopo and Laurent, thanks for reviewing.

On Fri, Mar 26, 2021 at 2:22 AM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>
> Hi Hiro,
>
> Thank you for the patch, it's nice to see the HAL being cleaned up :-)
>
> On Thu, Mar 25, 2021 at 05:02:43PM +0100, Jacopo Mondi wrote:
> > On Thu, Mar 25, 2021 at 08:13:57PM +0900, Hirokazu Honda wrote:
> > > CameraDevice creates Camera3RequestDescriptor in
> > > processCaptureRequest() and disallocates in requestComplete().
> > > Camera3RequestDescriptor can never be destroyed if
> > > requestComplete() is never called. This avoid the memory
> > > leakage by storing them in map CamerarRequestDescriptor.
>
> s/CamerarRequestDescriptor/CameraRequestDescriptor/
>
> When does this happen ? You've reported issues with the IPU3 pipeline
> handler failing to queue a request due to fact that it has no internal
> queue of requests waiting for hardware resources to be available. Is
> that what you're addressing here, or is there something else ? The
> libcamera core should guarantee that requestComplete() gets called for
> all requests successfully queued, so if there are other leakages, I'd
> like to investigate them.
>

Since a libcamera process is likely alive as long as a system runs, I
would like to avoid any potential leakage as much as possible.
Regardless of the issue I've reported, I consider it is good to
guarantee the descriptor is destroyed at the latest in CameraDevice
descriptor.
As you said, requestComplete() should be returned and I haven't found
any other issues of not invoking it than the issue I reported, but I
think it isn't theoretically ensured.

> > Very nice, I'm not particularly proud of this part of the
> > implementation as it is quite hard to follow, this makes it easier
> > indeed
> >
> > One question below
> >
> > >
> > > Signed-off-by: Hirokazu Honda <hiroh@chromium.org>
> > > ---
> > >  src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
> > >  src/android/camera_device.h   |  6 +++-
> > >  src/android/camera_worker.cpp |  4 +--
> > >  src/android/camera_worker.h   |  2 +-
> > >  4 files changed, 44 insertions(+), 35 deletions(-)
> > >
> > > diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
> > > index ae693664..50b017a3 100644
> > > --- a/src/android/camera_device.cpp
> > > +++ b/src/android/camera_device.cpp
> > > @@ -287,15 +287,17 @@ CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(
> > >
> > >     /*
> > >      * Create the libcamera::Request unique_ptr<> to tie its lifetime
> > > -    * to the descriptor's one. Set the descriptor's address as the
> > > -    * request's cookie to retrieve it at completion time.
> > > +    * to the descriptor's one.
> > >      */
> > > -   request_ = std::make_unique<CaptureRequest>(camera,
> > > -                                               reinterpret_cast<uint64_t>(this));
> > > +   request_ = std::make_unique<CaptureRequest>(camera);
> > >  }
> > >
> > > +CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
> > >  CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;
> > >
> > > +CameraDevice::Camera3RequestDescriptor&
> > > +CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
> > > +
> > >  /*
> > >   * \class CameraDevice
> > >   *
> > > @@ -1811,8 +1813,8 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > >      * The descriptor and the associated memory reserved here are freed
> > >      * at request complete time.
> > >      */
> > > -   Camera3RequestDescriptor *descriptor =
> > > -           new Camera3RequestDescriptor(camera_.get(), camera3Request);
> > > +   Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
> > > +
> > >     /*
> > >      * \todo The Android request model is incremental, settings passed in
> > >      * previous requests are to be effective until overridden explicitly in
> > > @@ -1822,12 +1824,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > >     if (camera3Request->settings)
> > >             lastSettings_ = camera3Request->settings;
> > >     else
> > > -           descriptor->settings_ = lastSettings_;
> > > +           descriptor.settings_ = lastSettings_;
> > >
> > > -   LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
> > > -                   << " with " << descriptor->buffers_.size() << " streams";
> > > -   for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
> > > -           const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
> > > +   LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
> > > +                   << " with " << descriptor.buffers_.size() << " streams";
> > > +   for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
> > > +           const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
> > >             camera3_stream *camera3Stream = camera3Buffer.stream;
> > >             CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);
> > >
> > > @@ -1860,7 +1862,7 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > >                      * lifetime management only.
> > >                      */
> > >                     buffer = createFrameBuffer(*camera3Buffer.buffer);
> > > -                   descriptor->frameBuffers_.emplace_back(buffer);
> > > +                   descriptor.frameBuffers_.emplace_back(buffer);
> > >                     LOG(HAL, Debug) << ss.str() << " (direct)";
> > >                     break;
> > >
> > > @@ -1879,11 +1881,10 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > >
> > >             if (!buffer) {
> > >                     LOG(HAL, Error) << "Failed to create buffer";
> > > -                   delete descriptor;
> > >                     return -ENOMEM;
> > >             }
> > >
> > > -           descriptor->request_->addBuffer(cameraStream->stream(), buffer,
> > > +           descriptor.request_->addBuffer(cameraStream->stream(), buffer,
> > >                                             camera3Buffer.acquire_fence);
> > >     }
> > >
> > > @@ -1891,11 +1892,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > >      * Translate controls from Android to libcamera and queue the request
> > >      * to the CameraWorker thread.
> > >      */
> > > -   int ret = processControls(descriptor);
> > > +   int ret = processControls(&descriptor);
> > >     if (ret)
> > >             return ret;
> > >
> > > -   worker_.queueRequest(descriptor->request_.get());
> > > +   worker_.queueRequest(descriptor.request_.get());
> > > +   descriptors_[descriptor.request_->cookie()] = std::move(descriptor);
> > >
> > >     return 0;
> > >  }
> > > @@ -1905,8 +1907,13 @@ void CameraDevice::requestComplete(Request *request)
> > >     const Request::BufferMap &buffers = request->buffers();
> > >     camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
> > >     std::unique_ptr<CameraMetadata> resultMetadata;
> > > -   Camera3RequestDescriptor *descriptor =
> > > -           reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
> > > +   auto node = descriptors_.extract(request->cookie());
> >
> > So once node goes out of scope the mapped descriptor gets deleted.
> > Very nice.
> >
> > However, shouldn't descriptors_ be cleared at stop() time or when a
> > new stream configuration is requested ?
>

descriptors_ is cleared thanks to std::map destructor, isn't it?
I think it is also true to ought to clear when a new stream
configuration is requested from the following camera3 API statement,
>  Add signal_stream_flush() to camera3_device_ops_t for camera service to notify HAL an
>  upcoming configure_streams() call requires HAL to return buffers of certain streams.
https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/camera3.h#194

> I'm wondering if we shouldn't go one step forward. I'll assume here that
> this patch is dealing with failures to queue requests when calling
> Camera::queueRequest(). If there are other issues, the comments below
> may not apply to them.
>
> When queuing a request to libcamera, we call
> CameraWorker::queueRequest(). It's a void function that invokes
> Worker::processRequest() as a queued call, so it returns immediately,
> and we can't know if Worker::processRequest() will fail or not. We have
> thus sent the request onto a journey, hoping it will be successfully
> queued. There can be multiple reasons that won't be the case:
>
> - CameraWorker::Worker::processRequest() failing to wait for a fence.
>   Oops, shouldn't happen, but we still need to handle this gracefully.
>   We're still in the HAL here, the request hasn't reached libcamera. The
>   HAL should thus handle the failure. This is case number 1.
>
> - CameraWorker::Worker::processRequest() calls CaptureRequest::queue(),
>   which calls Camera::queueRequest(). That function can return an error
>   if some of the sanity checks fail. We've reached libcamera, but it has
>   failed synchronously, so the HAL should handle the failure. Still case
>   number 1.
>
> - At this point, the request is sent to PipelineHandler::queueRequest(),
>   asynchronously again. That function doesn't perform any sanity check
>   (but it could in the future), and queues the request to the pipeline
>   handler with a call to PipelineHandler:queueRequestDevice(),
>   synchronously. This call can fail. If the future sanity checks in
>   PipelineHandler::queueRequest() or
>   PipelineHandler:queueRequestDevice() fail, the request belongs to the
>   libcamera core (the HAL has queued it successfully, and the pipeline
>   handler hasn't accepted it). This is case number 2, and I believe this
>   is the one that has triggered this patch.
>
> - The pipeline handler has accepted the request, but something fails
>   later. That's case number 3.
>
> Now let's look at those three cases, in reverse order.
>
> 3. The request belongs to the pipeline handler, which must complete it.
> We have support in the PipelineHandler base class to accept completion
> of requests out of sequence, and reorder the completion events to the
> HAL to guarantee they will be issued in sequence. The pipeline handler
> can thus complete the request with an error state as soon as it detects
> the error, and the HAL will eventually receive a requestComplete() call.
> As far as I can tell, this is working correctly.
>
> 2. This is embarassing, we don't handle this at all.
> PipelineHandler::queueRequest() returns an error code, but it never
> reaches Camera::queueReuest() as the call to
> PipelineHandler::queueRequest() is asynchronous. We should make
> PipelineHandler::queueRequest() a void function to be less misleading,
> and PipelineHandler::queueRequest() needs to signal request completion
> in that case. I think we can reuse the same request completion ordering
> mechanism and immediately mark the request as complete (with an error)
> in PipelineHandler::queueRequest() in that case. It should be an easy
> fix (famous last words... :-)).
>
> 1. This is the responsibility of the HAL, which should signal request
> completion to the camera service. Depending on whether the camera
> service can support out-of-order completion or not, we can signal
> completion immediately, or need to keep the request in a queue and
> complete it at the appropriate time.
>
> As far as I understand, this patch is meant to address the memory leak
> related to case 2, and also addresses as a consequence the leak related
> to case 1 (case 3 should be working correctly already, if I'm not
> mistaken). For case 2 I think the correct fix should be to ensure that
> libcamera will complete the request, as explained above. For case 1, we
> need a specific fix in the HAL, not only for the memory leak, but to
> also complete the request towards the camera service.
>
> Is this analysis correct, or am I missing something ?
>

Thanks for analyzing the details. Yes your analysis looks correct.
Well, I expect this patch is to retain memory leakage in any cases.
I mean that no memory is leaked due to any failures.

I agree that the failure 1 should be handled properly.
The fix to failure 2 also sounds good to me.

Best Regards,
-Hiro

> > > +   if (node.empty()) {
> > > +           LOG(HAL, Error) << "Unknown request: " << request->cookie();
> > > +           status = CAMERA3_BUFFER_STATUS_ERROR;
> > > +           return;
> > > +   }
> > > +   Camera3RequestDescriptor& descriptor = node.mapped();
> > >
> > >     if (request->status() != Request::RequestComplete) {
> > >             LOG(HAL, Error) << "Request not successfully completed: "
> > > @@ -1915,7 +1922,7 @@ void CameraDevice::requestComplete(Request *request)
> > >     }
> > >
> > >     LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
> > > -                   << descriptor->buffers_.size() << " streams";
> > > +                   << descriptor.buffers_.size() << " streams";
> > >
> > >     /*
> > >      * \todo The timestamp used for the metadata is currently always taken
> > > @@ -1924,10 +1931,10 @@ void CameraDevice::requestComplete(Request *request)
> > >      * pipeline handlers) timestamp in the Request itself.
> > >      */
> > >     uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
> > > -   resultMetadata = getResultMetadata(*descriptor, timestamp);
> > > +   resultMetadata = getResultMetadata(descriptor, timestamp);
> > >
> > >     /* Handle any JPEG compression. */
> > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > >             CameraStream *cameraStream =
> > >                     static_cast<CameraStream *>(buffer.stream->priv);
> > >
> > > @@ -1942,7 +1949,7 @@ void CameraDevice::requestComplete(Request *request)
> > >
> > >             int ret = cameraStream->process(*src,
> > >                                             *buffer.buffer,
> > > -                                           descriptor->settings_,
> > > +                                           descriptor.settings_,
> > >                                             resultMetadata.get());
> > >             if (ret) {
> > >                     status = CAMERA3_BUFFER_STATUS_ERROR;
> > > @@ -1959,17 +1966,17 @@ void CameraDevice::requestComplete(Request *request)
> > >
> > >     /* Prepare to call back the Android camera stack. */
> > >     camera3_capture_result_t captureResult = {};
> > > -   captureResult.frame_number = descriptor->frameNumber_;
> > > -   captureResult.num_output_buffers = descriptor->buffers_.size();
> > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > +   captureResult.frame_number = descriptor.frameNumber_;
> > > +   captureResult.num_output_buffers = descriptor.buffers_.size();
> > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > >             buffer.acquire_fence = -1;
> > >             buffer.release_fence = -1;
> > >             buffer.status = status;
> > >     }
> > > -   captureResult.output_buffers = descriptor->buffers_.data();
> > > +   captureResult.output_buffers = descriptor.buffers_.data();
> > >
> > >     if (status == CAMERA3_BUFFER_STATUS_OK) {
> > > -           notifyShutter(descriptor->frameNumber_, timestamp);
> > > +           notifyShutter(descriptor.frameNumber_, timestamp);
> > >
> > >             captureResult.partial_result = 1;
> > >             captureResult.result = resultMetadata->get();
> > > @@ -1982,13 +1989,11 @@ void CameraDevice::requestComplete(Request *request)
> > >              * is here signalled. Make sure the error path plays well with
> > >              * the camera stack state machine.
> > >              */
> > > -           notifyError(descriptor->frameNumber_,
> > > -                       descriptor->buffers_[0].stream);
> > > +           notifyError(descriptor.frameNumber_,
> > > +                       descriptor.buffers_[0].stream);
> > >     }
> > >
> > >     callbacks_->process_capture_result(callbacks_, &captureResult);
> > > -
> > > -   delete descriptor;
> > >  }
> > >
> > >  std::string CameraDevice::logPrefix() const
> > > diff --git a/src/android/camera_device.h b/src/android/camera_device.h
> > > index 11bdfec8..54d7c319 100644
> > > --- a/src/android/camera_device.h
> > > +++ b/src/android/camera_device.h
> > > @@ -69,9 +69,11 @@ private:
> > >     CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);
> > >
> > >     struct Camera3RequestDescriptor {
> > > +           Camera3RequestDescriptor();
> > > +           ~Camera3RequestDescriptor();
> > >             Camera3RequestDescriptor(libcamera::Camera *camera,
> > >                                      const camera3_capture_request_t *camera3Request);
> > > -           ~Camera3RequestDescriptor();
> > > +           Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);
> > >
> > >             uint32_t frameNumber_;
> > >             std::vector<camera3_stream_buffer_t> buffers_;
> > > @@ -122,6 +124,8 @@ private:
> > >     std::map<int, libcamera::PixelFormat> formatsMap_;
> > >     std::vector<CameraStream> streams_;
> > >
> > > +   std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
> > > +
> > >     std::string maker_;
> > >     std::string model_;
> > >
> > > diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
> > > index df436460..300ddde0 100644
> > > --- a/src/android/camera_worker.cpp
> > > +++ b/src/android/camera_worker.cpp
> > > @@ -26,10 +26,10 @@ LOG_DECLARE_CATEGORY(HAL)
> > >   * by the CameraWorker which queues it to the libcamera::Camera after handling
> > >   * fences.
> > >   */
> > > -CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
> > > +CaptureRequest::CaptureRequest(libcamera::Camera *camera)
> > >     : camera_(camera)
> > >  {
> > > -   request_ = camera_->createRequest(cookie);
> > > +   request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
> > >  }
> > >
> > >  void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
> > > diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
> > > index d7060576..64b1658b 100644
> > > --- a/src/android/camera_worker.h
> > > +++ b/src/android/camera_worker.h
> > > @@ -22,7 +22,7 @@ class CameraDevice;
> > >  class CaptureRequest
> > >  {
> > >  public:
> > > -   CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
> > > +   CaptureRequest(libcamera::Camera *camera);
> > >
> > >     const std::vector<int> &fences() const { return acquireFences_; }
> > >     libcamera::ControlList &controls() { return request_->controls(); }
>
> --
> Regards,
>
> Laurent Pinchart
Laurent Pinchart March 28, 2021, 10:17 p.m. UTC | #4
Hi Hiro,

On Mon, Mar 29, 2021 at 07:10:27AM +0900, Hirokazu Honda wrote:
> Hi Jacopo and Laurent, thanks for reviewing.
> 
> On Fri, Mar 26, 2021 at 2:22 AM Laurent Pinchartwrote:
> >
> > Hi Hiro,
> >
> > Thank you for the patch, it's nice to see the HAL being cleaned up :-)
> >
> > On Thu, Mar 25, 2021 at 05:02:43PM +0100, Jacopo Mondi wrote:
> > > On Thu, Mar 25, 2021 at 08:13:57PM +0900, Hirokazu Honda wrote:
> > > > CameraDevice creates Camera3RequestDescriptor in
> > > > processCaptureRequest() and disallocates in requestComplete().
> > > > Camera3RequestDescriptor can never be destroyed if
> > > > requestComplete() is never called. This avoid the memory
> > > > leakage by storing them in map CamerarRequestDescriptor.
> >
> > s/CamerarRequestDescriptor/CameraRequestDescriptor/
> >
> > When does this happen ? You've reported issues with the IPU3 pipeline
> > handler failing to queue a request due to fact that it has no internal
> > queue of requests waiting for hardware resources to be available. Is
> > that what you're addressing here, or is there something else ? The
> > libcamera core should guarantee that requestComplete() gets called for
> > all requests successfully queued, so if there are other leakages, I'd
> > like to investigate them.
> 
> Since a libcamera process is likely alive as long as a system runs, I
> would like to avoid any potential leakage as much as possible.

We agree on this :-)

> Regardless of the issue I've reported, I consider it is good to
> guarantee the descriptor is destroyed at the latest in CameraDevice
> descriptor.
> As you said, requestComplete() should be returned and I haven't found
> any other issues of not invoking it than the issue I reported, but I
> think it isn't theoretically ensured.

My point was that, as this isn't supposed to happen in the first place,
I want to make sure we investigate any occurrence of the issue you may
have been able to trigger. There's one particular problem you've
reported that can lead this this leakage, and that will be addressed on
its own, and I was wondering if there were any other. You're confirming
that you haven't found any other, so I'm happy :-)

> > > Very nice, I'm not particularly proud of this part of the
> > > implementation as it is quite hard to follow, this makes it easier
> > > indeed
> > >
> > > One question below
> > >
> > > >
> > > > Signed-off-by: Hirokazu Honda <hiroh@chromium.org>
> > > > ---
> > > >  src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
> > > >  src/android/camera_device.h   |  6 +++-
> > > >  src/android/camera_worker.cpp |  4 +--
> > > >  src/android/camera_worker.h   |  2 +-
> > > >  4 files changed, 44 insertions(+), 35 deletions(-)
> > > >
> > > > diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
> > > > index ae693664..50b017a3 100644
> > > > --- a/src/android/camera_device.cpp
> > > > +++ b/src/android/camera_device.cpp
> > > > @@ -287,15 +287,17 @@ CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(
> > > >
> > > >     /*
> > > >      * Create the libcamera::Request unique_ptr<> to tie its lifetime
> > > > -    * to the descriptor's one. Set the descriptor's address as the
> > > > -    * request's cookie to retrieve it at completion time.
> > > > +    * to the descriptor's one.
> > > >      */
> > > > -   request_ = std::make_unique<CaptureRequest>(camera,
> > > > -                                               reinterpret_cast<uint64_t>(this));
> > > > +   request_ = std::make_unique<CaptureRequest>(camera);
> > > >  }
> > > >
> > > > +CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
> > > >  CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;
> > > >
> > > > +CameraDevice::Camera3RequestDescriptor&
> > > > +CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
> > > > +
> > > >  /*
> > > >   * \class CameraDevice
> > > >   *
> > > > @@ -1811,8 +1813,8 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > >      * The descriptor and the associated memory reserved here are freed
> > > >      * at request complete time.
> > > >      */
> > > > -   Camera3RequestDescriptor *descriptor =
> > > > -           new Camera3RequestDescriptor(camera_.get(), camera3Request);
> > > > +   Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
> > > > +
> > > >     /*
> > > >      * \todo The Android request model is incremental, settings passed in
> > > >      * previous requests are to be effective until overridden explicitly in
> > > > @@ -1822,12 +1824,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > >     if (camera3Request->settings)
> > > >             lastSettings_ = camera3Request->settings;
> > > >     else
> > > > -           descriptor->settings_ = lastSettings_;
> > > > +           descriptor.settings_ = lastSettings_;
> > > >
> > > > -   LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
> > > > -                   << " with " << descriptor->buffers_.size() << " streams";
> > > > -   for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
> > > > -           const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
> > > > +   LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
> > > > +                   << " with " << descriptor.buffers_.size() << " streams";
> > > > +   for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
> > > > +           const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
> > > >             camera3_stream *camera3Stream = camera3Buffer.stream;
> > > >             CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);
> > > >
> > > > @@ -1860,7 +1862,7 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > >                      * lifetime management only.
> > > >                      */
> > > >                     buffer = createFrameBuffer(*camera3Buffer.buffer);
> > > > -                   descriptor->frameBuffers_.emplace_back(buffer);
> > > > +                   descriptor.frameBuffers_.emplace_back(buffer);
> > > >                     LOG(HAL, Debug) << ss.str() << " (direct)";
> > > >                     break;
> > > >
> > > > @@ -1879,11 +1881,10 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > >
> > > >             if (!buffer) {
> > > >                     LOG(HAL, Error) << "Failed to create buffer";
> > > > -                   delete descriptor;
> > > >                     return -ENOMEM;
> > > >             }
> > > >
> > > > -           descriptor->request_->addBuffer(cameraStream->stream(), buffer,
> > > > +           descriptor.request_->addBuffer(cameraStream->stream(), buffer,
> > > >                                             camera3Buffer.acquire_fence);
> > > >     }
> > > >
> > > > @@ -1891,11 +1892,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > >      * Translate controls from Android to libcamera and queue the request
> > > >      * to the CameraWorker thread.
> > > >      */
> > > > -   int ret = processControls(descriptor);
> > > > +   int ret = processControls(&descriptor);
> > > >     if (ret)
> > > >             return ret;
> > > >
> > > > -   worker_.queueRequest(descriptor->request_.get());
> > > > +   worker_.queueRequest(descriptor.request_.get());
> > > > +   descriptors_[descriptor.request_->cookie()] = std::move(descriptor);
> > > >
> > > >     return 0;
> > > >  }
> > > > @@ -1905,8 +1907,13 @@ void CameraDevice::requestComplete(Request *request)
> > > >     const Request::BufferMap &buffers = request->buffers();
> > > >     camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
> > > >     std::unique_ptr<CameraMetadata> resultMetadata;
> > > > -   Camera3RequestDescriptor *descriptor =
> > > > -           reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
> > > > +   auto node = descriptors_.extract(request->cookie());
> > >
> > > So once node goes out of scope the mapped descriptor gets deleted.
> > > Very nice.
> > >
> > > However, shouldn't descriptors_ be cleared at stop() time or when a
> > > new stream configuration is requested ?
> 
> descriptors_ is cleared thanks to std::map destructor, isn't it?
> I think it is also true to ought to clear when a new stream
> configuration is requested from the following camera3 API statement,
>
> >  Add signal_stream_flush() to camera3_device_ops_t for camera service to notify HAL an
> >  upcoming configure_streams() call requires HAL to return buffers of certain streams.
> https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/camera3.h#194
> 
> > I'm wondering if we shouldn't go one step forward. I'll assume here that
> > this patch is dealing with failures to queue requests when calling
> > Camera::queueRequest(). If there are other issues, the comments below
> > may not apply to them.
> >
> > When queuing a request to libcamera, we call
> > CameraWorker::queueRequest(). It's a void function that invokes
> > Worker::processRequest() as a queued call, so it returns immediately,
> > and we can't know if Worker::processRequest() will fail or not. We have
> > thus sent the request onto a journey, hoping it will be successfully
> > queued. There can be multiple reasons that won't be the case:
> >
> > - CameraWorker::Worker::processRequest() failing to wait for a fence.
> >   Oops, shouldn't happen, but we still need to handle this gracefully.
> >   We're still in the HAL here, the request hasn't reached libcamera. The
> >   HAL should thus handle the failure. This is case number 1.
> >
> > - CameraWorker::Worker::processRequest() calls CaptureRequest::queue(),
> >   which calls Camera::queueRequest(). That function can return an error
> >   if some of the sanity checks fail. We've reached libcamera, but it has
> >   failed synchronously, so the HAL should handle the failure. Still case
> >   number 1.
> >
> > - At this point, the request is sent to PipelineHandler::queueRequest(),
> >   asynchronously again. That function doesn't perform any sanity check
> >   (but it could in the future), and queues the request to the pipeline
> >   handler with a call to PipelineHandler:queueRequestDevice(),
> >   synchronously. This call can fail. If the future sanity checks in
> >   PipelineHandler::queueRequest() or
> >   PipelineHandler:queueRequestDevice() fail, the request belongs to the
> >   libcamera core (the HAL has queued it successfully, and the pipeline
> >   handler hasn't accepted it). This is case number 2, and I believe this
> >   is the one that has triggered this patch.
> >
> > - The pipeline handler has accepted the request, but something fails
> >   later. That's case number 3.
> >
> > Now let's look at those three cases, in reverse order.
> >
> > 3. The request belongs to the pipeline handler, which must complete it.
> > We have support in the PipelineHandler base class to accept completion
> > of requests out of sequence, and reorder the completion events to the
> > HAL to guarantee they will be issued in sequence. The pipeline handler
> > can thus complete the request with an error state as soon as it detects
> > the error, and the HAL will eventually receive a requestComplete() call.
> > As far as I can tell, this is working correctly.
> >
> > 2. This is embarassing, we don't handle this at all.
> > PipelineHandler::queueRequest() returns an error code, but it never
> > reaches Camera::queueReuest() as the call to
> > PipelineHandler::queueRequest() is asynchronous. We should make
> > PipelineHandler::queueRequest() a void function to be less misleading,
> > and PipelineHandler::queueRequest() needs to signal request completion
> > in that case. I think we can reuse the same request completion ordering
> > mechanism and immediately mark the request as complete (with an error)
> > in PipelineHandler::queueRequest() in that case. It should be an easy
> > fix (famous last words... :-)).
> >
> > 1. This is the responsibility of the HAL, which should signal request
> > completion to the camera service. Depending on whether the camera
> > service can support out-of-order completion or not, we can signal
> > completion immediately, or need to keep the request in a queue and
> > complete it at the appropriate time.
> >
> > As far as I understand, this patch is meant to address the memory leak
> > related to case 2, and also addresses as a consequence the leak related
> > to case 1 (case 3 should be working correctly already, if I'm not
> > mistaken). For case 2 I think the correct fix should be to ensure that
> > libcamera will complete the request, as explained above. For case 1, we
> > need a specific fix in the HAL, not only for the memory leak, but to
> > also complete the request towards the camera service.
> >
> > Is this analysis correct, or am I missing something ?
> 
> Thanks for analyzing the details. Yes your analysis looks correct.
> Well, I expect this patch is to retain memory leakage in any cases.
> I mean that no memory is leaked due to any failures.
> 
> I agree that the failure 1 should be handled properly.
> The fix to failure 2 also sounds good to me.

Thanks for confirming. Do you plan to look into addressing issues 1
and/or 2 ?

> > > > +   if (node.empty()) {
> > > > +           LOG(HAL, Error) << "Unknown request: " << request->cookie();
> > > > +           status = CAMERA3_BUFFER_STATUS_ERROR;
> > > > +           return;
> > > > +   }
> > > > +   Camera3RequestDescriptor& descriptor = node.mapped();
> > > >
> > > >     if (request->status() != Request::RequestComplete) {
> > > >             LOG(HAL, Error) << "Request not successfully completed: "
> > > > @@ -1915,7 +1922,7 @@ void CameraDevice::requestComplete(Request *request)
> > > >     }
> > > >
> > > >     LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
> > > > -                   << descriptor->buffers_.size() << " streams";
> > > > +                   << descriptor.buffers_.size() << " streams";
> > > >
> > > >     /*
> > > >      * \todo The timestamp used for the metadata is currently always taken
> > > > @@ -1924,10 +1931,10 @@ void CameraDevice::requestComplete(Request *request)
> > > >      * pipeline handlers) timestamp in the Request itself.
> > > >      */
> > > >     uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
> > > > -   resultMetadata = getResultMetadata(*descriptor, timestamp);
> > > > +   resultMetadata = getResultMetadata(descriptor, timestamp);
> > > >
> > > >     /* Handle any JPEG compression. */
> > > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > > >             CameraStream *cameraStream =
> > > >                     static_cast<CameraStream *>(buffer.stream->priv);
> > > >
> > > > @@ -1942,7 +1949,7 @@ void CameraDevice::requestComplete(Request *request)
> > > >
> > > >             int ret = cameraStream->process(*src,
> > > >                                             *buffer.buffer,
> > > > -                                           descriptor->settings_,
> > > > +                                           descriptor.settings_,
> > > >                                             resultMetadata.get());
> > > >             if (ret) {
> > > >                     status = CAMERA3_BUFFER_STATUS_ERROR;
> > > > @@ -1959,17 +1966,17 @@ void CameraDevice::requestComplete(Request *request)
> > > >
> > > >     /* Prepare to call back the Android camera stack. */
> > > >     camera3_capture_result_t captureResult = {};
> > > > -   captureResult.frame_number = descriptor->frameNumber_;
> > > > -   captureResult.num_output_buffers = descriptor->buffers_.size();
> > > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > > +   captureResult.frame_number = descriptor.frameNumber_;
> > > > +   captureResult.num_output_buffers = descriptor.buffers_.size();
> > > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > > >             buffer.acquire_fence = -1;
> > > >             buffer.release_fence = -1;
> > > >             buffer.status = status;
> > > >     }
> > > > -   captureResult.output_buffers = descriptor->buffers_.data();
> > > > +   captureResult.output_buffers = descriptor.buffers_.data();
> > > >
> > > >     if (status == CAMERA3_BUFFER_STATUS_OK) {
> > > > -           notifyShutter(descriptor->frameNumber_, timestamp);
> > > > +           notifyShutter(descriptor.frameNumber_, timestamp);
> > > >
> > > >             captureResult.partial_result = 1;
> > > >             captureResult.result = resultMetadata->get();
> > > > @@ -1982,13 +1989,11 @@ void CameraDevice::requestComplete(Request *request)
> > > >              * is here signalled. Make sure the error path plays well with
> > > >              * the camera stack state machine.
> > > >              */
> > > > -           notifyError(descriptor->frameNumber_,
> > > > -                       descriptor->buffers_[0].stream);
> > > > +           notifyError(descriptor.frameNumber_,
> > > > +                       descriptor.buffers_[0].stream);
> > > >     }
> > > >
> > > >     callbacks_->process_capture_result(callbacks_, &captureResult);
> > > > -
> > > > -   delete descriptor;
> > > >  }
> > > >
> > > >  std::string CameraDevice::logPrefix() const
> > > > diff --git a/src/android/camera_device.h b/src/android/camera_device.h
> > > > index 11bdfec8..54d7c319 100644
> > > > --- a/src/android/camera_device.h
> > > > +++ b/src/android/camera_device.h
> > > > @@ -69,9 +69,11 @@ private:
> > > >     CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);
> > > >
> > > >     struct Camera3RequestDescriptor {
> > > > +           Camera3RequestDescriptor();
> > > > +           ~Camera3RequestDescriptor();
> > > >             Camera3RequestDescriptor(libcamera::Camera *camera,
> > > >                                      const camera3_capture_request_t *camera3Request);
> > > > -           ~Camera3RequestDescriptor();
> > > > +           Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);
> > > >
> > > >             uint32_t frameNumber_;
> > > >             std::vector<camera3_stream_buffer_t> buffers_;
> > > > @@ -122,6 +124,8 @@ private:
> > > >     std::map<int, libcamera::PixelFormat> formatsMap_;
> > > >     std::vector<CameraStream> streams_;
> > > >
> > > > +   std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
> > > > +
> > > >     std::string maker_;
> > > >     std::string model_;
> > > >
> > > > diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
> > > > index df436460..300ddde0 100644
> > > > --- a/src/android/camera_worker.cpp
> > > > +++ b/src/android/camera_worker.cpp
> > > > @@ -26,10 +26,10 @@ LOG_DECLARE_CATEGORY(HAL)
> > > >   * by the CameraWorker which queues it to the libcamera::Camera after handling
> > > >   * fences.
> > > >   */
> > > > -CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
> > > > +CaptureRequest::CaptureRequest(libcamera::Camera *camera)
> > > >     : camera_(camera)
> > > >  {
> > > > -   request_ = camera_->createRequest(cookie);
> > > > +   request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
> > > >  }
> > > >
> > > >  void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
> > > > diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
> > > > index d7060576..64b1658b 100644
> > > > --- a/src/android/camera_worker.h
> > > > +++ b/src/android/camera_worker.h
> > > > @@ -22,7 +22,7 @@ class CameraDevice;
> > > >  class CaptureRequest
> > > >  {
> > > >  public:
> > > > -   CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
> > > > +   CaptureRequest(libcamera::Camera *camera);
> > > >
> > > >     const std::vector<int> &fences() const { return acquireFences_; }
> > > >     libcamera::ControlList &controls() { return request_->controls(); }
Hirokazu Honda March 28, 2021, 10:24 p.m. UTC | #5
Hi Laurent,

On Mon, Mar 29, 2021 at 7:17 AM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>
> Hi Hiro,
>
> On Mon, Mar 29, 2021 at 07:10:27AM +0900, Hirokazu Honda wrote:
> > Hi Jacopo and Laurent, thanks for reviewing.
> >
> > On Fri, Mar 26, 2021 at 2:22 AM Laurent Pinchartwrote:
> > >
> > > Hi Hiro,
> > >
> > > Thank you for the patch, it's nice to see the HAL being cleaned up :-)
> > >
> > > On Thu, Mar 25, 2021 at 05:02:43PM +0100, Jacopo Mondi wrote:
> > > > On Thu, Mar 25, 2021 at 08:13:57PM +0900, Hirokazu Honda wrote:
> > > > > CameraDevice creates Camera3RequestDescriptor in
> > > > > processCaptureRequest() and disallocates in requestComplete().
> > > > > Camera3RequestDescriptor can never be destroyed if
> > > > > requestComplete() is never called. This avoid the memory
> > > > > leakage by storing them in map CamerarRequestDescriptor.
> > >
> > > s/CamerarRequestDescriptor/CameraRequestDescriptor/
> > >
> > > When does this happen ? You've reported issues with the IPU3 pipeline
> > > handler failing to queue a request due to fact that it has no internal
> > > queue of requests waiting for hardware resources to be available. Is
> > > that what you're addressing here, or is there something else ? The
> > > libcamera core should guarantee that requestComplete() gets called for
> > > all requests successfully queued, so if there are other leakages, I'd
> > > like to investigate them.
> >
> > Since a libcamera process is likely alive as long as a system runs, I
> > would like to avoid any potential leakage as much as possible.
>
> We agree on this :-)
>
> > Regardless of the issue I've reported, I consider it is good to
> > guarantee the descriptor is destroyed at the latest in CameraDevice
> > descriptor.
> > As you said, requestComplete() should be returned and I haven't found
> > any other issues of not invoking it than the issue I reported, but I
> > think it isn't theoretically ensured.
>
> My point was that, as this isn't supposed to happen in the first place,
> I want to make sure we investigate any occurrence of the issue you may
> have been able to trigger. There's one particular problem you've
> reported that can lead this this leakage, and that will be addressed on
> its own, and I was wondering if there were any other. You're confirming
> that you haven't found any other, so I'm happy :-)
>
> > > > Very nice, I'm not particularly proud of this part of the
> > > > implementation as it is quite hard to follow, this makes it easier
> > > > indeed
> > > >
> > > > One question below
> > > >
> > > > >
> > > > > Signed-off-by: Hirokazu Honda <hiroh@chromium.org>
> > > > > ---
> > > > >  src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
> > > > >  src/android/camera_device.h   |  6 +++-
> > > > >  src/android/camera_worker.cpp |  4 +--
> > > > >  src/android/camera_worker.h   |  2 +-
> > > > >  4 files changed, 44 insertions(+), 35 deletions(-)
> > > > >
> > > > > diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
> > > > > index ae693664..50b017a3 100644
> > > > > --- a/src/android/camera_device.cpp
> > > > > +++ b/src/android/camera_device.cpp
> > > > > @@ -287,15 +287,17 @@ CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(
> > > > >
> > > > >     /*
> > > > >      * Create the libcamera::Request unique_ptr<> to tie its lifetime
> > > > > -    * to the descriptor's one. Set the descriptor's address as the
> > > > > -    * request's cookie to retrieve it at completion time.
> > > > > +    * to the descriptor's one.
> > > > >      */
> > > > > -   request_ = std::make_unique<CaptureRequest>(camera,
> > > > > -                                               reinterpret_cast<uint64_t>(this));
> > > > > +   request_ = std::make_unique<CaptureRequest>(camera);
> > > > >  }
> > > > >
> > > > > +CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
> > > > >  CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;
> > > > >
> > > > > +CameraDevice::Camera3RequestDescriptor&
> > > > > +CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
> > > > > +
> > > > >  /*
> > > > >   * \class CameraDevice
> > > > >   *
> > > > > @@ -1811,8 +1813,8 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > >      * The descriptor and the associated memory reserved here are freed
> > > > >      * at request complete time.
> > > > >      */
> > > > > -   Camera3RequestDescriptor *descriptor =
> > > > > -           new Camera3RequestDescriptor(camera_.get(), camera3Request);
> > > > > +   Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
> > > > > +
> > > > >     /*
> > > > >      * \todo The Android request model is incremental, settings passed in
> > > > >      * previous requests are to be effective until overridden explicitly in
> > > > > @@ -1822,12 +1824,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > >     if (camera3Request->settings)
> > > > >             lastSettings_ = camera3Request->settings;
> > > > >     else
> > > > > -           descriptor->settings_ = lastSettings_;
> > > > > +           descriptor.settings_ = lastSettings_;
> > > > >
> > > > > -   LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
> > > > > -                   << " with " << descriptor->buffers_.size() << " streams";
> > > > > -   for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
> > > > > -           const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
> > > > > +   LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
> > > > > +                   << " with " << descriptor.buffers_.size() << " streams";
> > > > > +   for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
> > > > > +           const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
> > > > >             camera3_stream *camera3Stream = camera3Buffer.stream;
> > > > >             CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);
> > > > >
> > > > > @@ -1860,7 +1862,7 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > >                      * lifetime management only.
> > > > >                      */
> > > > >                     buffer = createFrameBuffer(*camera3Buffer.buffer);
> > > > > -                   descriptor->frameBuffers_.emplace_back(buffer);
> > > > > +                   descriptor.frameBuffers_.emplace_back(buffer);
> > > > >                     LOG(HAL, Debug) << ss.str() << " (direct)";
> > > > >                     break;
> > > > >
> > > > > @@ -1879,11 +1881,10 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > >
> > > > >             if (!buffer) {
> > > > >                     LOG(HAL, Error) << "Failed to create buffer";
> > > > > -                   delete descriptor;
> > > > >                     return -ENOMEM;
> > > > >             }
> > > > >
> > > > > -           descriptor->request_->addBuffer(cameraStream->stream(), buffer,
> > > > > +           descriptor.request_->addBuffer(cameraStream->stream(), buffer,
> > > > >                                             camera3Buffer.acquire_fence);
> > > > >     }
> > > > >
> > > > > @@ -1891,11 +1892,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > >      * Translate controls from Android to libcamera and queue the request
> > > > >      * to the CameraWorker thread.
> > > > >      */
> > > > > -   int ret = processControls(descriptor);
> > > > > +   int ret = processControls(&descriptor);
> > > > >     if (ret)
> > > > >             return ret;
> > > > >
> > > > > -   worker_.queueRequest(descriptor->request_.get());
> > > > > +   worker_.queueRequest(descriptor.request_.get());
> > > > > +   descriptors_[descriptor.request_->cookie()] = std::move(descriptor);
> > > > >
> > > > >     return 0;
> > > > >  }
> > > > > @@ -1905,8 +1907,13 @@ void CameraDevice::requestComplete(Request *request)
> > > > >     const Request::BufferMap &buffers = request->buffers();
> > > > >     camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
> > > > >     std::unique_ptr<CameraMetadata> resultMetadata;
> > > > > -   Camera3RequestDescriptor *descriptor =
> > > > > -           reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
> > > > > +   auto node = descriptors_.extract(request->cookie());
> > > >
> > > > So once node goes out of scope the mapped descriptor gets deleted.
> > > > Very nice.
> > > >
> > > > However, shouldn't descriptors_ be cleared at stop() time or when a
> > > > new stream configuration is requested ?
> >
> > descriptors_ is cleared thanks to std::map destructor, isn't it?
> > I think it is also true to ought to clear when a new stream
> > configuration is requested from the following camera3 API statement,
> >
> > >  Add signal_stream_flush() to camera3_device_ops_t for camera service to notify HAL an
> > >  upcoming configure_streams() call requires HAL to return buffers of certain streams.
> > https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/camera3.h#194
> >
> > > I'm wondering if we shouldn't go one step forward. I'll assume here that
> > > this patch is dealing with failures to queue requests when calling
> > > Camera::queueRequest(). If there are other issues, the comments below
> > > may not apply to them.
> > >
> > > When queuing a request to libcamera, we call
> > > CameraWorker::queueRequest(). It's a void function that invokes
> > > Worker::processRequest() as a queued call, so it returns immediately,
> > > and we can't know if Worker::processRequest() will fail or not. We have
> > > thus sent the request onto a journey, hoping it will be successfully
> > > queued. There can be multiple reasons that won't be the case:
> > >
> > > - CameraWorker::Worker::processRequest() failing to wait for a fence.
> > >   Oops, shouldn't happen, but we still need to handle this gracefully.
> > >   We're still in the HAL here, the request hasn't reached libcamera. The
> > >   HAL should thus handle the failure. This is case number 1.
> > >
> > > - CameraWorker::Worker::processRequest() calls CaptureRequest::queue(),
> > >   which calls Camera::queueRequest(). That function can return an error
> > >   if some of the sanity checks fail. We've reached libcamera, but it has
> > >   failed synchronously, so the HAL should handle the failure. Still case
> > >   number 1.
> > >
> > > - At this point, the request is sent to PipelineHandler::queueRequest(),
> > >   asynchronously again. That function doesn't perform any sanity check
> > >   (but it could in the future), and queues the request to the pipeline
> > >   handler with a call to PipelineHandler:queueRequestDevice(),
> > >   synchronously. This call can fail. If the future sanity checks in
> > >   PipelineHandler::queueRequest() or
> > >   PipelineHandler:queueRequestDevice() fail, the request belongs to the
> > >   libcamera core (the HAL has queued it successfully, and the pipeline
> > >   handler hasn't accepted it). This is case number 2, and I believe this
> > >   is the one that has triggered this patch.
> > >
> > > - The pipeline handler has accepted the request, but something fails
> > >   later. That's case number 3.
> > >
> > > Now let's look at those three cases, in reverse order.
> > >
> > > 3. The request belongs to the pipeline handler, which must complete it.
> > > We have support in the PipelineHandler base class to accept completion
> > > of requests out of sequence, and reorder the completion events to the
> > > HAL to guarantee they will be issued in sequence. The pipeline handler
> > > can thus complete the request with an error state as soon as it detects
> > > the error, and the HAL will eventually receive a requestComplete() call.
> > > As far as I can tell, this is working correctly.
> > >
> > > 2. This is embarassing, we don't handle this at all.
> > > PipelineHandler::queueRequest() returns an error code, but it never
> > > reaches Camera::queueReuest() as the call to
> > > PipelineHandler::queueRequest() is asynchronous. We should make
> > > PipelineHandler::queueRequest() a void function to be less misleading,
> > > and PipelineHandler::queueRequest() needs to signal request completion
> > > in that case. I think we can reuse the same request completion ordering
> > > mechanism and immediately mark the request as complete (with an error)
> > > in PipelineHandler::queueRequest() in that case. It should be an easy
> > > fix (famous last words... :-)).
> > >
> > > 1. This is the responsibility of the HAL, which should signal request
> > > completion to the camera service. Depending on whether the camera
> > > service can support out-of-order completion or not, we can signal
> > > completion immediately, or need to keep the request in a queue and
> > > complete it at the appropriate time.
> > >
> > > As far as I understand, this patch is meant to address the memory leak
> > > related to case 2, and also addresses as a consequence the leak related
> > > to case 1 (case 3 should be working correctly already, if I'm not
> > > mistaken). For case 2 I think the correct fix should be to ensure that
> > > libcamera will complete the request, as explained above. For case 1, we
> > > need a specific fix in the HAL, not only for the memory leak, but to
> > > also complete the request towards the camera service.
> > >
> > > Is this analysis correct, or am I missing something ?
> >
> > Thanks for analyzing the details. Yes your analysis looks correct.
> > Well, I expect this patch is to retain memory leakage in any cases.
> > I mean that no memory is leaked due to any failures.
> >
> > I agree that the failure 1 should be handled properly.
> > The fix to failure 2 also sounds good to me.
>
> Thanks for confirming. Do you plan to look into addressing issues 1
> and/or 2 ?
>

Sure. I will do it. Can we review and check in this patch while I am doing?

-Hiro

> > > > > +   if (node.empty()) {
> > > > > +           LOG(HAL, Error) << "Unknown request: " << request->cookie();
> > > > > +           status = CAMERA3_BUFFER_STATUS_ERROR;
> > > > > +           return;
> > > > > +   }
> > > > > +   Camera3RequestDescriptor& descriptor = node.mapped();
> > > > >
> > > > >     if (request->status() != Request::RequestComplete) {
> > > > >             LOG(HAL, Error) << "Request not successfully completed: "
> > > > > @@ -1915,7 +1922,7 @@ void CameraDevice::requestComplete(Request *request)
> > > > >     }
> > > > >
> > > > >     LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
> > > > > -                   << descriptor->buffers_.size() << " streams";
> > > > > +                   << descriptor.buffers_.size() << " streams";
> > > > >
> > > > >     /*
> > > > >      * \todo The timestamp used for the metadata is currently always taken
> > > > > @@ -1924,10 +1931,10 @@ void CameraDevice::requestComplete(Request *request)
> > > > >      * pipeline handlers) timestamp in the Request itself.
> > > > >      */
> > > > >     uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
> > > > > -   resultMetadata = getResultMetadata(*descriptor, timestamp);
> > > > > +   resultMetadata = getResultMetadata(descriptor, timestamp);
> > > > >
> > > > >     /* Handle any JPEG compression. */
> > > > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > > > >             CameraStream *cameraStream =
> > > > >                     static_cast<CameraStream *>(buffer.stream->priv);
> > > > >
> > > > > @@ -1942,7 +1949,7 @@ void CameraDevice::requestComplete(Request *request)
> > > > >
> > > > >             int ret = cameraStream->process(*src,
> > > > >                                             *buffer.buffer,
> > > > > -                                           descriptor->settings_,
> > > > > +                                           descriptor.settings_,
> > > > >                                             resultMetadata.get());
> > > > >             if (ret) {
> > > > >                     status = CAMERA3_BUFFER_STATUS_ERROR;
> > > > > @@ -1959,17 +1966,17 @@ void CameraDevice::requestComplete(Request *request)
> > > > >
> > > > >     /* Prepare to call back the Android camera stack. */
> > > > >     camera3_capture_result_t captureResult = {};
> > > > > -   captureResult.frame_number = descriptor->frameNumber_;
> > > > > -   captureResult.num_output_buffers = descriptor->buffers_.size();
> > > > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > > > +   captureResult.frame_number = descriptor.frameNumber_;
> > > > > +   captureResult.num_output_buffers = descriptor.buffers_.size();
> > > > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > > > >             buffer.acquire_fence = -1;
> > > > >             buffer.release_fence = -1;
> > > > >             buffer.status = status;
> > > > >     }
> > > > > -   captureResult.output_buffers = descriptor->buffers_.data();
> > > > > +   captureResult.output_buffers = descriptor.buffers_.data();
> > > > >
> > > > >     if (status == CAMERA3_BUFFER_STATUS_OK) {
> > > > > -           notifyShutter(descriptor->frameNumber_, timestamp);
> > > > > +           notifyShutter(descriptor.frameNumber_, timestamp);
> > > > >
> > > > >             captureResult.partial_result = 1;
> > > > >             captureResult.result = resultMetadata->get();
> > > > > @@ -1982,13 +1989,11 @@ void CameraDevice::requestComplete(Request *request)
> > > > >              * is here signalled. Make sure the error path plays well with
> > > > >              * the camera stack state machine.
> > > > >              */
> > > > > -           notifyError(descriptor->frameNumber_,
> > > > > -                       descriptor->buffers_[0].stream);
> > > > > +           notifyError(descriptor.frameNumber_,
> > > > > +                       descriptor.buffers_[0].stream);
> > > > >     }
> > > > >
> > > > >     callbacks_->process_capture_result(callbacks_, &captureResult);
> > > > > -
> > > > > -   delete descriptor;
> > > > >  }
> > > > >
> > > > >  std::string CameraDevice::logPrefix() const
> > > > > diff --git a/src/android/camera_device.h b/src/android/camera_device.h
> > > > > index 11bdfec8..54d7c319 100644
> > > > > --- a/src/android/camera_device.h
> > > > > +++ b/src/android/camera_device.h
> > > > > @@ -69,9 +69,11 @@ private:
> > > > >     CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);
> > > > >
> > > > >     struct Camera3RequestDescriptor {
> > > > > +           Camera3RequestDescriptor();
> > > > > +           ~Camera3RequestDescriptor();
> > > > >             Camera3RequestDescriptor(libcamera::Camera *camera,
> > > > >                                      const camera3_capture_request_t *camera3Request);
> > > > > -           ~Camera3RequestDescriptor();
> > > > > +           Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);
> > > > >
> > > > >             uint32_t frameNumber_;
> > > > >             std::vector<camera3_stream_buffer_t> buffers_;
> > > > > @@ -122,6 +124,8 @@ private:
> > > > >     std::map<int, libcamera::PixelFormat> formatsMap_;
> > > > >     std::vector<CameraStream> streams_;
> > > > >
> > > > > +   std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
> > > > > +
> > > > >     std::string maker_;
> > > > >     std::string model_;
> > > > >
> > > > > diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
> > > > > index df436460..300ddde0 100644
> > > > > --- a/src/android/camera_worker.cpp
> > > > > +++ b/src/android/camera_worker.cpp
> > > > > @@ -26,10 +26,10 @@ LOG_DECLARE_CATEGORY(HAL)
> > > > >   * by the CameraWorker which queues it to the libcamera::Camera after handling
> > > > >   * fences.
> > > > >   */
> > > > > -CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
> > > > > +CaptureRequest::CaptureRequest(libcamera::Camera *camera)
> > > > >     : camera_(camera)
> > > > >  {
> > > > > -   request_ = camera_->createRequest(cookie);
> > > > > +   request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
> > > > >  }
> > > > >
> > > > >  void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
> > > > > diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
> > > > > index d7060576..64b1658b 100644
> > > > > --- a/src/android/camera_worker.h
> > > > > +++ b/src/android/camera_worker.h
> > > > > @@ -22,7 +22,7 @@ class CameraDevice;
> > > > >  class CaptureRequest
> > > > >  {
> > > > >  public:
> > > > > -   CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
> > > > > +   CaptureRequest(libcamera::Camera *camera);
> > > > >
> > > > >     const std::vector<int> &fences() const { return acquireFences_; }
> > > > >     libcamera::ControlList &controls() { return request_->controls(); }
>
> --
> Regards,
>
> Laurent Pinchart
Laurent Pinchart March 29, 2021, 5:35 a.m. UTC | #6
Hi Hiro,

On Mon, Mar 29, 2021 at 07:24:02AM +0900, Hirokazu Honda wrote:
> On Mon, Mar 29, 2021 at 7:17 AM Laurent Pinchart wrote:
> > On Mon, Mar 29, 2021 at 07:10:27AM +0900, Hirokazu Honda wrote:
> > > On Fri, Mar 26, 2021 at 2:22 AM Laurent Pinchartwrote:
> > > > On Thu, Mar 25, 2021 at 05:02:43PM +0100, Jacopo Mondi wrote:
> > > > > On Thu, Mar 25, 2021 at 08:13:57PM +0900, Hirokazu Honda wrote:
> > > > > > CameraDevice creates Camera3RequestDescriptor in
> > > > > > processCaptureRequest() and disallocates in requestComplete().
> > > > > > Camera3RequestDescriptor can never be destroyed if
> > > > > > requestComplete() is never called. This avoid the memory
> > > > > > leakage by storing them in map CamerarRequestDescriptor.
> > > >
> > > > s/CamerarRequestDescriptor/CameraRequestDescriptor/
> > > >
> > > > When does this happen ? You've reported issues with the IPU3 pipeline
> > > > handler failing to queue a request due to fact that it has no internal
> > > > queue of requests waiting for hardware resources to be available. Is
> > > > that what you're addressing here, or is there something else ? The
> > > > libcamera core should guarantee that requestComplete() gets called for
> > > > all requests successfully queued, so if there are other leakages, I'd
> > > > like to investigate them.
> > >
> > > Since a libcamera process is likely alive as long as a system runs, I
> > > would like to avoid any potential leakage as much as possible.
> >
> > We agree on this :-)
> >
> > > Regardless of the issue I've reported, I consider it is good to
> > > guarantee the descriptor is destroyed at the latest in CameraDevice
> > > descriptor.
> > > As you said, requestComplete() should be returned and I haven't found
> > > any other issues of not invoking it than the issue I reported, but I
> > > think it isn't theoretically ensured.
> >
> > My point was that, as this isn't supposed to happen in the first place,
> > I want to make sure we investigate any occurrence of the issue you may
> > have been able to trigger. There's one particular problem you've
> > reported that can lead this this leakage, and that will be addressed on
> > its own, and I was wondering if there were any other. You're confirming
> > that you haven't found any other, so I'm happy :-)
> >
> > > > > Very nice, I'm not particularly proud of this part of the
> > > > > implementation as it is quite hard to follow, this makes it easier
> > > > > indeed
> > > > >
> > > > > One question below
> > > > >
> > > > > >
> > > > > > Signed-off-by: Hirokazu Honda <hiroh@chromium.org>
> > > > > > ---
> > > > > >  src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
> > > > > >  src/android/camera_device.h   |  6 +++-
> > > > > >  src/android/camera_worker.cpp |  4 +--
> > > > > >  src/android/camera_worker.h   |  2 +-
> > > > > >  4 files changed, 44 insertions(+), 35 deletions(-)
> > > > > >
> > > > > > diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
> > > > > > index ae693664..50b017a3 100644
> > > > > > --- a/src/android/camera_device.cpp
> > > > > > +++ b/src/android/camera_device.cpp
> > > > > > @@ -287,15 +287,17 @@ CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(
> > > > > >
> > > > > >     /*
> > > > > >      * Create the libcamera::Request unique_ptr<> to tie its lifetime
> > > > > > -    * to the descriptor's one. Set the descriptor's address as the
> > > > > > -    * request's cookie to retrieve it at completion time.
> > > > > > +    * to the descriptor's one.
> > > > > >      */
> > > > > > -   request_ = std::make_unique<CaptureRequest>(camera,
> > > > > > -                                               reinterpret_cast<uint64_t>(this));
> > > > > > +   request_ = std::make_unique<CaptureRequest>(camera);
> > > > > >  }
> > > > > >
> > > > > > +CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
> > > > > >  CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;
> > > > > >
> > > > > > +CameraDevice::Camera3RequestDescriptor&
> > > > > > +CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
> > > > > > +
> > > > > >  /*
> > > > > >   * \class CameraDevice
> > > > > >   *
> > > > > > @@ -1811,8 +1813,8 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > >      * The descriptor and the associated memory reserved here are freed
> > > > > >      * at request complete time.
> > > > > >      */
> > > > > > -   Camera3RequestDescriptor *descriptor =
> > > > > > -           new Camera3RequestDescriptor(camera_.get(), camera3Request);
> > > > > > +   Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
> > > > > > +
> > > > > >     /*
> > > > > >      * \todo The Android request model is incremental, settings passed in
> > > > > >      * previous requests are to be effective until overridden explicitly in
> > > > > > @@ -1822,12 +1824,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > >     if (camera3Request->settings)
> > > > > >             lastSettings_ = camera3Request->settings;
> > > > > >     else
> > > > > > -           descriptor->settings_ = lastSettings_;
> > > > > > +           descriptor.settings_ = lastSettings_;
> > > > > >
> > > > > > -   LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
> > > > > > -                   << " with " << descriptor->buffers_.size() << " streams";
> > > > > > -   for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
> > > > > > -           const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
> > > > > > +   LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
> > > > > > +                   << " with " << descriptor.buffers_.size() << " streams";
> > > > > > +   for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
> > > > > > +           const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
> > > > > >             camera3_stream *camera3Stream = camera3Buffer.stream;
> > > > > >             CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);
> > > > > >
> > > > > > @@ -1860,7 +1862,7 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > >                      * lifetime management only.
> > > > > >                      */
> > > > > >                     buffer = createFrameBuffer(*camera3Buffer.buffer);
> > > > > > -                   descriptor->frameBuffers_.emplace_back(buffer);
> > > > > > +                   descriptor.frameBuffers_.emplace_back(buffer);
> > > > > >                     LOG(HAL, Debug) << ss.str() << " (direct)";
> > > > > >                     break;
> > > > > >
> > > > > > @@ -1879,11 +1881,10 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > >
> > > > > >             if (!buffer) {
> > > > > >                     LOG(HAL, Error) << "Failed to create buffer";
> > > > > > -                   delete descriptor;
> > > > > >                     return -ENOMEM;
> > > > > >             }
> > > > > >
> > > > > > -           descriptor->request_->addBuffer(cameraStream->stream(), buffer,
> > > > > > +           descriptor.request_->addBuffer(cameraStream->stream(), buffer,
> > > > > >                                             camera3Buffer.acquire_fence);
> > > > > >     }
> > > > > >
> > > > > > @@ -1891,11 +1892,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > >      * Translate controls from Android to libcamera and queue the request
> > > > > >      * to the CameraWorker thread.
> > > > > >      */
> > > > > > -   int ret = processControls(descriptor);
> > > > > > +   int ret = processControls(&descriptor);
> > > > > >     if (ret)
> > > > > >             return ret;
> > > > > >
> > > > > > -   worker_.queueRequest(descriptor->request_.get());
> > > > > > +   worker_.queueRequest(descriptor.request_.get());
> > > > > > +   descriptors_[descriptor.request_->cookie()] = std::move(descriptor);
> > > > > >
> > > > > >     return 0;
> > > > > >  }
> > > > > > @@ -1905,8 +1907,13 @@ void CameraDevice::requestComplete(Request *request)
> > > > > >     const Request::BufferMap &buffers = request->buffers();
> > > > > >     camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
> > > > > >     std::unique_ptr<CameraMetadata> resultMetadata;
> > > > > > -   Camera3RequestDescriptor *descriptor =
> > > > > > -           reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
> > > > > > +   auto node = descriptors_.extract(request->cookie());
> > > > >
> > > > > So once node goes out of scope the mapped descriptor gets deleted.
> > > > > Very nice.
> > > > >
> > > > > However, shouldn't descriptors_ be cleared at stop() time or when a
> > > > > new stream configuration is requested ?
> > >
> > > descriptors_ is cleared thanks to std::map destructor, isn't it?
> > > I think it is also true to ought to clear when a new stream
> > > configuration is requested from the following camera3 API statement,
> > >
> > > >  Add signal_stream_flush() to camera3_device_ops_t for camera service to notify HAL an
> > > >  upcoming configure_streams() call requires HAL to return buffers of certain streams.
> > > https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/camera3.h#194
> > >
> > > > I'm wondering if we shouldn't go one step forward. I'll assume here that
> > > > this patch is dealing with failures to queue requests when calling
> > > > Camera::queueRequest(). If there are other issues, the comments below
> > > > may not apply to them.
> > > >
> > > > When queuing a request to libcamera, we call
> > > > CameraWorker::queueRequest(). It's a void function that invokes
> > > > Worker::processRequest() as a queued call, so it returns immediately,
> > > > and we can't know if Worker::processRequest() will fail or not. We have
> > > > thus sent the request onto a journey, hoping it will be successfully
> > > > queued. There can be multiple reasons that won't be the case:
> > > >
> > > > - CameraWorker::Worker::processRequest() failing to wait for a fence.
> > > >   Oops, shouldn't happen, but we still need to handle this gracefully.
> > > >   We're still in the HAL here, the request hasn't reached libcamera. The
> > > >   HAL should thus handle the failure. This is case number 1.
> > > >
> > > > - CameraWorker::Worker::processRequest() calls CaptureRequest::queue(),
> > > >   which calls Camera::queueRequest(). That function can return an error
> > > >   if some of the sanity checks fail. We've reached libcamera, but it has
> > > >   failed synchronously, so the HAL should handle the failure. Still case
> > > >   number 1.
> > > >
> > > > - At this point, the request is sent to PipelineHandler::queueRequest(),
> > > >   asynchronously again. That function doesn't perform any sanity check
> > > >   (but it could in the future), and queues the request to the pipeline
> > > >   handler with a call to PipelineHandler:queueRequestDevice(),
> > > >   synchronously. This call can fail. If the future sanity checks in
> > > >   PipelineHandler::queueRequest() or
> > > >   PipelineHandler:queueRequestDevice() fail, the request belongs to the
> > > >   libcamera core (the HAL has queued it successfully, and the pipeline
> > > >   handler hasn't accepted it). This is case number 2, and I believe this
> > > >   is the one that has triggered this patch.
> > > >
> > > > - The pipeline handler has accepted the request, but something fails
> > > >   later. That's case number 3.
> > > >
> > > > Now let's look at those three cases, in reverse order.
> > > >
> > > > 3. The request belongs to the pipeline handler, which must complete it.
> > > > We have support in the PipelineHandler base class to accept completion
> > > > of requests out of sequence, and reorder the completion events to the
> > > > HAL to guarantee they will be issued in sequence. The pipeline handler
> > > > can thus complete the request with an error state as soon as it detects
> > > > the error, and the HAL will eventually receive a requestComplete() call.
> > > > As far as I can tell, this is working correctly.
> > > >
> > > > 2. This is embarassing, we don't handle this at all.
> > > > PipelineHandler::queueRequest() returns an error code, but it never
> > > > reaches Camera::queueReuest() as the call to
> > > > PipelineHandler::queueRequest() is asynchronous. We should make
> > > > PipelineHandler::queueRequest() a void function to be less misleading,
> > > > and PipelineHandler::queueRequest() needs to signal request completion
> > > > in that case. I think we can reuse the same request completion ordering
> > > > mechanism and immediately mark the request as complete (with an error)
> > > > in PipelineHandler::queueRequest() in that case. It should be an easy
> > > > fix (famous last words... :-)).
> > > >
> > > > 1. This is the responsibility of the HAL, which should signal request
> > > > completion to the camera service. Depending on whether the camera
> > > > service can support out-of-order completion or not, we can signal
> > > > completion immediately, or need to keep the request in a queue and
> > > > complete it at the appropriate time.
> > > >
> > > > As far as I understand, this patch is meant to address the memory leak
> > > > related to case 2, and also addresses as a consequence the leak related
> > > > to case 1 (case 3 should be working correctly already, if I'm not
> > > > mistaken). For case 2 I think the correct fix should be to ensure that
> > > > libcamera will complete the request, as explained above. For case 1, we
> > > > need a specific fix in the HAL, not only for the memory leak, but to
> > > > also complete the request towards the camera service.
> > > >
> > > > Is this analysis correct, or am I missing something ?
> > >
> > > Thanks for analyzing the details. Yes your analysis looks correct.
> > > Well, I expect this patch is to retain memory leakage in any cases.
> > > I mean that no memory is leaked due to any failures.
> > >
> > > I agree that the failure 1 should be handled properly.
> > > The fix to failure 2 also sounds good to me.
> >
> > Thanks for confirming. Do you plan to look into addressing issues 1
> > and/or 2 ?
> 
> Sure. I will do it. Can we review and check in this patch while I am doing?

Overall I think the patch is OK, but could we clear the descriptors_ map
when Camera::stop() is called in CameraDevice::configureStreams() ? I
would create a new private CameraDevice::stop() function:

void CameraDevice::stop()
{
	worker_.stop();
	camera_->stop();

	descriptors_.clear();
	running_ = false;
}

and call it from both CameraDevice::close() and
CameraDevice::configureStreams().

> > > > > > +   if (node.empty()) {
> > > > > > +           LOG(HAL, Error) << "Unknown request: " << request->cookie();
> > > > > > +           status = CAMERA3_BUFFER_STATUS_ERROR;
> > > > > > +           return;
> > > > > > +   }
> > > > > > +   Camera3RequestDescriptor& descriptor = node.mapped();
> > > > > >
> > > > > >     if (request->status() != Request::RequestComplete) {
> > > > > >             LOG(HAL, Error) << "Request not successfully completed: "
> > > > > > @@ -1915,7 +1922,7 @@ void CameraDevice::requestComplete(Request *request)
> > > > > >     }
> > > > > >
> > > > > >     LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
> > > > > > -                   << descriptor->buffers_.size() << " streams";
> > > > > > +                   << descriptor.buffers_.size() << " streams";
> > > > > >
> > > > > >     /*
> > > > > >      * \todo The timestamp used for the metadata is currently always taken
> > > > > > @@ -1924,10 +1931,10 @@ void CameraDevice::requestComplete(Request *request)
> > > > > >      * pipeline handlers) timestamp in the Request itself.
> > > > > >      */
> > > > > >     uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
> > > > > > -   resultMetadata = getResultMetadata(*descriptor, timestamp);
> > > > > > +   resultMetadata = getResultMetadata(descriptor, timestamp);
> > > > > >
> > > > > >     /* Handle any JPEG compression. */
> > > > > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > > > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > > > > >             CameraStream *cameraStream =
> > > > > >                     static_cast<CameraStream *>(buffer.stream->priv);
> > > > > >
> > > > > > @@ -1942,7 +1949,7 @@ void CameraDevice::requestComplete(Request *request)
> > > > > >
> > > > > >             int ret = cameraStream->process(*src,
> > > > > >                                             *buffer.buffer,
> > > > > > -                                           descriptor->settings_,
> > > > > > +                                           descriptor.settings_,
> > > > > >                                             resultMetadata.get());
> > > > > >             if (ret) {
> > > > > >                     status = CAMERA3_BUFFER_STATUS_ERROR;
> > > > > > @@ -1959,17 +1966,17 @@ void CameraDevice::requestComplete(Request *request)
> > > > > >
> > > > > >     /* Prepare to call back the Android camera stack. */
> > > > > >     camera3_capture_result_t captureResult = {};
> > > > > > -   captureResult.frame_number = descriptor->frameNumber_;
> > > > > > -   captureResult.num_output_buffers = descriptor->buffers_.size();
> > > > > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > > > > +   captureResult.frame_number = descriptor.frameNumber_;
> > > > > > +   captureResult.num_output_buffers = descriptor.buffers_.size();
> > > > > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > > > > >             buffer.acquire_fence = -1;
> > > > > >             buffer.release_fence = -1;
> > > > > >             buffer.status = status;
> > > > > >     }
> > > > > > -   captureResult.output_buffers = descriptor->buffers_.data();
> > > > > > +   captureResult.output_buffers = descriptor.buffers_.data();
> > > > > >
> > > > > >     if (status == CAMERA3_BUFFER_STATUS_OK) {
> > > > > > -           notifyShutter(descriptor->frameNumber_, timestamp);
> > > > > > +           notifyShutter(descriptor.frameNumber_, timestamp);
> > > > > >
> > > > > >             captureResult.partial_result = 1;
> > > > > >             captureResult.result = resultMetadata->get();
> > > > > > @@ -1982,13 +1989,11 @@ void CameraDevice::requestComplete(Request *request)
> > > > > >              * is here signalled. Make sure the error path plays well with
> > > > > >              * the camera stack state machine.
> > > > > >              */
> > > > > > -           notifyError(descriptor->frameNumber_,
> > > > > > -                       descriptor->buffers_[0].stream);
> > > > > > +           notifyError(descriptor.frameNumber_,
> > > > > > +                       descriptor.buffers_[0].stream);
> > > > > >     }
> > > > > >
> > > > > >     callbacks_->process_capture_result(callbacks_, &captureResult);
> > > > > > -
> > > > > > -   delete descriptor;
> > > > > >  }
> > > > > >
> > > > > >  std::string CameraDevice::logPrefix() const
> > > > > > diff --git a/src/android/camera_device.h b/src/android/camera_device.h
> > > > > > index 11bdfec8..54d7c319 100644
> > > > > > --- a/src/android/camera_device.h
> > > > > > +++ b/src/android/camera_device.h
> > > > > > @@ -69,9 +69,11 @@ private:
> > > > > >     CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);
> > > > > >
> > > > > >     struct Camera3RequestDescriptor {
> > > > > > +           Camera3RequestDescriptor();
> > > > > > +           ~Camera3RequestDescriptor();
> > > > > >             Camera3RequestDescriptor(libcamera::Camera *camera,
> > > > > >                                      const camera3_capture_request_t *camera3Request);
> > > > > > -           ~Camera3RequestDescriptor();
> > > > > > +           Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);
> > > > > >
> > > > > >             uint32_t frameNumber_;
> > > > > >             std::vector<camera3_stream_buffer_t> buffers_;
> > > > > > @@ -122,6 +124,8 @@ private:
> > > > > >     std::map<int, libcamera::PixelFormat> formatsMap_;
> > > > > >     std::vector<CameraStream> streams_;
> > > > > >
> > > > > > +   std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
> > > > > > +
> > > > > >     std::string maker_;
> > > > > >     std::string model_;
> > > > > >
> > > > > > diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
> > > > > > index df436460..300ddde0 100644
> > > > > > --- a/src/android/camera_worker.cpp
> > > > > > +++ b/src/android/camera_worker.cpp
> > > > > > @@ -26,10 +26,10 @@ LOG_DECLARE_CATEGORY(HAL)
> > > > > >   * by the CameraWorker which queues it to the libcamera::Camera after handling
> > > > > >   * fences.
> > > > > >   */
> > > > > > -CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
> > > > > > +CaptureRequest::CaptureRequest(libcamera::Camera *camera)
> > > > > >     : camera_(camera)
> > > > > >  {
> > > > > > -   request_ = camera_->createRequest(cookie);
> > > > > > +   request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
> > > > > >  }
> > > > > >
> > > > > >  void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
> > > > > > diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
> > > > > > index d7060576..64b1658b 100644
> > > > > > --- a/src/android/camera_worker.h
> > > > > > +++ b/src/android/camera_worker.h
> > > > > > @@ -22,7 +22,7 @@ class CameraDevice;
> > > > > >  class CaptureRequest
> > > > > >  {
> > > > > >  public:
> > > > > > -   CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
> > > > > > +   CaptureRequest(libcamera::Camera *camera);
> > > > > >
> > > > > >     const std::vector<int> &fences() const { return acquireFences_; }
> > > > > >     libcamera::ControlList &controls() { return request_->controls(); }
Hirokazu Honda March 29, 2021, 6:12 a.m. UTC | #7
Hi Laurent,

On Mon, Mar 29, 2021 at 2:36 PM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>
> Hi Hiro,
>
> On Mon, Mar 29, 2021 at 07:24:02AM +0900, Hirokazu Honda wrote:
> > On Mon, Mar 29, 2021 at 7:17 AM Laurent Pinchart wrote:
> > > On Mon, Mar 29, 2021 at 07:10:27AM +0900, Hirokazu Honda wrote:
> > > > On Fri, Mar 26, 2021 at 2:22 AM Laurent Pinchartwrote:
> > > > > On Thu, Mar 25, 2021 at 05:02:43PM +0100, Jacopo Mondi wrote:
> > > > > > On Thu, Mar 25, 2021 at 08:13:57PM +0900, Hirokazu Honda wrote:
> > > > > > > CameraDevice creates Camera3RequestDescriptor in
> > > > > > > processCaptureRequest() and disallocates in requestComplete().
> > > > > > > Camera3RequestDescriptor can never be destroyed if
> > > > > > > requestComplete() is never called. This avoid the memory
> > > > > > > leakage by storing them in map CamerarRequestDescriptor.
> > > > >
> > > > > s/CamerarRequestDescriptor/CameraRequestDescriptor/
> > > > >
> > > > > When does this happen ? You've reported issues with the IPU3 pipeline
> > > > > handler failing to queue a request due to fact that it has no internal
> > > > > queue of requests waiting for hardware resources to be available. Is
> > > > > that what you're addressing here, or is there something else ? The
> > > > > libcamera core should guarantee that requestComplete() gets called for
> > > > > all requests successfully queued, so if there are other leakages, I'd
> > > > > like to investigate them.
> > > >
> > > > Since a libcamera process is likely alive as long as a system runs, I
> > > > would like to avoid any potential leakage as much as possible.
> > >
> > > We agree on this :-)
> > >
> > > > Regardless of the issue I've reported, I consider it is good to
> > > > guarantee the descriptor is destroyed at the latest in CameraDevice
> > > > descriptor.
> > > > As you said, requestComplete() should be returned and I haven't found
> > > > any other issues of not invoking it than the issue I reported, but I
> > > > think it isn't theoretically ensured.
> > >
> > > My point was that, as this isn't supposed to happen in the first place,
> > > I want to make sure we investigate any occurrence of the issue you may
> > > have been able to trigger. There's one particular problem you've
> > > reported that can lead this this leakage, and that will be addressed on
> > > its own, and I was wondering if there were any other. You're confirming
> > > that you haven't found any other, so I'm happy :-)
> > >
> > > > > > Very nice, I'm not particularly proud of this part of the
> > > > > > implementation as it is quite hard to follow, this makes it easier
> > > > > > indeed
> > > > > >
> > > > > > One question below
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Hirokazu Honda <hiroh@chromium.org>
> > > > > > > ---
> > > > > > >  src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
> > > > > > >  src/android/camera_device.h   |  6 +++-
> > > > > > >  src/android/camera_worker.cpp |  4 +--
> > > > > > >  src/android/camera_worker.h   |  2 +-
> > > > > > >  4 files changed, 44 insertions(+), 35 deletions(-)
> > > > > > >
> > > > > > > diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
> > > > > > > index ae693664..50b017a3 100644
> > > > > > > --- a/src/android/camera_device.cpp
> > > > > > > +++ b/src/android/camera_device.cpp
> > > > > > > @@ -287,15 +287,17 @@ CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(
> > > > > > >
> > > > > > >     /*
> > > > > > >      * Create the libcamera::Request unique_ptr<> to tie its lifetime
> > > > > > > -    * to the descriptor's one. Set the descriptor's address as the
> > > > > > > -    * request's cookie to retrieve it at completion time.
> > > > > > > +    * to the descriptor's one.
> > > > > > >      */
> > > > > > > -   request_ = std::make_unique<CaptureRequest>(camera,
> > > > > > > -                                               reinterpret_cast<uint64_t>(this));
> > > > > > > +   request_ = std::make_unique<CaptureRequest>(camera);
> > > > > > >  }
> > > > > > >
> > > > > > > +CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
> > > > > > >  CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;
> > > > > > >
> > > > > > > +CameraDevice::Camera3RequestDescriptor&
> > > > > > > +CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
> > > > > > > +
> > > > > > >  /*
> > > > > > >   * \class CameraDevice
> > > > > > >   *
> > > > > > > @@ -1811,8 +1813,8 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > > >      * The descriptor and the associated memory reserved here are freed
> > > > > > >      * at request complete time.
> > > > > > >      */
> > > > > > > -   Camera3RequestDescriptor *descriptor =
> > > > > > > -           new Camera3RequestDescriptor(camera_.get(), camera3Request);
> > > > > > > +   Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
> > > > > > > +
> > > > > > >     /*
> > > > > > >      * \todo The Android request model is incremental, settings passed in
> > > > > > >      * previous requests are to be effective until overridden explicitly in
> > > > > > > @@ -1822,12 +1824,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > > >     if (camera3Request->settings)
> > > > > > >             lastSettings_ = camera3Request->settings;
> > > > > > >     else
> > > > > > > -           descriptor->settings_ = lastSettings_;
> > > > > > > +           descriptor.settings_ = lastSettings_;
> > > > > > >
> > > > > > > -   LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
> > > > > > > -                   << " with " << descriptor->buffers_.size() << " streams";
> > > > > > > -   for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
> > > > > > > -           const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
> > > > > > > +   LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
> > > > > > > +                   << " with " << descriptor.buffers_.size() << " streams";
> > > > > > > +   for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
> > > > > > > +           const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
> > > > > > >             camera3_stream *camera3Stream = camera3Buffer.stream;
> > > > > > >             CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);
> > > > > > >
> > > > > > > @@ -1860,7 +1862,7 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > > >                      * lifetime management only.
> > > > > > >                      */
> > > > > > >                     buffer = createFrameBuffer(*camera3Buffer.buffer);
> > > > > > > -                   descriptor->frameBuffers_.emplace_back(buffer);
> > > > > > > +                   descriptor.frameBuffers_.emplace_back(buffer);
> > > > > > >                     LOG(HAL, Debug) << ss.str() << " (direct)";
> > > > > > >                     break;
> > > > > > >
> > > > > > > @@ -1879,11 +1881,10 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > > >
> > > > > > >             if (!buffer) {
> > > > > > >                     LOG(HAL, Error) << "Failed to create buffer";
> > > > > > > -                   delete descriptor;
> > > > > > >                     return -ENOMEM;
> > > > > > >             }
> > > > > > >
> > > > > > > -           descriptor->request_->addBuffer(cameraStream->stream(), buffer,
> > > > > > > +           descriptor.request_->addBuffer(cameraStream->stream(), buffer,
> > > > > > >                                             camera3Buffer.acquire_fence);
> > > > > > >     }
> > > > > > >
> > > > > > > @@ -1891,11 +1892,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> > > > > > >      * Translate controls from Android to libcamera and queue the request
> > > > > > >      * to the CameraWorker thread.
> > > > > > >      */
> > > > > > > -   int ret = processControls(descriptor);
> > > > > > > +   int ret = processControls(&descriptor);
> > > > > > >     if (ret)
> > > > > > >             return ret;
> > > > > > >
> > > > > > > -   worker_.queueRequest(descriptor->request_.get());
> > > > > > > +   worker_.queueRequest(descriptor.request_.get());
> > > > > > > +   descriptors_[descriptor.request_->cookie()] = std::move(descriptor);
> > > > > > >
> > > > > > >     return 0;
> > > > > > >  }
> > > > > > > @@ -1905,8 +1907,13 @@ void CameraDevice::requestComplete(Request *request)
> > > > > > >     const Request::BufferMap &buffers = request->buffers();
> > > > > > >     camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
> > > > > > >     std::unique_ptr<CameraMetadata> resultMetadata;
> > > > > > > -   Camera3RequestDescriptor *descriptor =
> > > > > > > -           reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
> > > > > > > +   auto node = descriptors_.extract(request->cookie());
> > > > > >
> > > > > > So once node goes out of scope the mapped descriptor gets deleted.
> > > > > > Very nice.
> > > > > >
> > > > > > However, shouldn't descriptors_ be cleared at stop() time or when a
> > > > > > new stream configuration is requested ?
> > > >
> > > > descriptors_ is cleared thanks to std::map destructor, isn't it?
> > > > I think it is also true to ought to clear when a new stream
> > > > configuration is requested from the following camera3 API statement,
> > > >
> > > > >  Add signal_stream_flush() to camera3_device_ops_t for camera service to notify HAL an
> > > > >  upcoming configure_streams() call requires HAL to return buffers of certain streams.
> > > > https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/camera3.h#194
> > > >
> > > > > I'm wondering if we shouldn't go one step forward. I'll assume here that
> > > > > this patch is dealing with failures to queue requests when calling
> > > > > Camera::queueRequest(). If there are other issues, the comments below
> > > > > may not apply to them.
> > > > >
> > > > > When queuing a request to libcamera, we call
> > > > > CameraWorker::queueRequest(). It's a void function that invokes
> > > > > Worker::processRequest() as a queued call, so it returns immediately,
> > > > > and we can't know if Worker::processRequest() will fail or not. We have
> > > > > thus sent the request onto a journey, hoping it will be successfully
> > > > > queued. There can be multiple reasons that won't be the case:
> > > > >
> > > > > - CameraWorker::Worker::processRequest() failing to wait for a fence.
> > > > >   Oops, shouldn't happen, but we still need to handle this gracefully.
> > > > >   We're still in the HAL here, the request hasn't reached libcamera. The
> > > > >   HAL should thus handle the failure. This is case number 1.
> > > > >
> > > > > - CameraWorker::Worker::processRequest() calls CaptureRequest::queue(),
> > > > >   which calls Camera::queueRequest(). That function can return an error
> > > > >   if some of the sanity checks fail. We've reached libcamera, but it has
> > > > >   failed synchronously, so the HAL should handle the failure. Still case
> > > > >   number 1.
> > > > >
> > > > > - At this point, the request is sent to PipelineHandler::queueRequest(),
> > > > >   asynchronously again. That function doesn't perform any sanity check
> > > > >   (but it could in the future), and queues the request to the pipeline
> > > > >   handler with a call to PipelineHandler:queueRequestDevice(),
> > > > >   synchronously. This call can fail. If the future sanity checks in
> > > > >   PipelineHandler::queueRequest() or
> > > > >   PipelineHandler:queueRequestDevice() fail, the request belongs to the
> > > > >   libcamera core (the HAL has queued it successfully, and the pipeline
> > > > >   handler hasn't accepted it). This is case number 2, and I believe this
> > > > >   is the one that has triggered this patch.
> > > > >
> > > > > - The pipeline handler has accepted the request, but something fails
> > > > >   later. That's case number 3.
> > > > >
> > > > > Now let's look at those three cases, in reverse order.
> > > > >
> > > > > 3. The request belongs to the pipeline handler, which must complete it.
> > > > > We have support in the PipelineHandler base class to accept completion
> > > > > of requests out of sequence, and reorder the completion events to the
> > > > > HAL to guarantee they will be issued in sequence. The pipeline handler
> > > > > can thus complete the request with an error state as soon as it detects
> > > > > the error, and the HAL will eventually receive a requestComplete() call.
> > > > > As far as I can tell, this is working correctly.
> > > > >
> > > > > 2. This is embarassing, we don't handle this at all.
> > > > > PipelineHandler::queueRequest() returns an error code, but it never
> > > > > reaches Camera::queueReuest() as the call to
> > > > > PipelineHandler::queueRequest() is asynchronous. We should make
> > > > > PipelineHandler::queueRequest() a void function to be less misleading,
> > > > > and PipelineHandler::queueRequest() needs to signal request completion
> > > > > in that case. I think we can reuse the same request completion ordering
> > > > > mechanism and immediately mark the request as complete (with an error)
> > > > > in PipelineHandler::queueRequest() in that case. It should be an easy
> > > > > fix (famous last words... :-)).
> > > > >
> > > > > 1. This is the responsibility of the HAL, which should signal request
> > > > > completion to the camera service. Depending on whether the camera
> > > > > service can support out-of-order completion or not, we can signal
> > > > > completion immediately, or need to keep the request in a queue and
> > > > > complete it at the appropriate time.
> > > > >
> > > > > As far as I understand, this patch is meant to address the memory leak
> > > > > related to case 2, and also addresses as a consequence the leak related
> > > > > to case 1 (case 3 should be working correctly already, if I'm not
> > > > > mistaken). For case 2 I think the correct fix should be to ensure that
> > > > > libcamera will complete the request, as explained above. For case 1, we
> > > > > need a specific fix in the HAL, not only for the memory leak, but to
> > > > > also complete the request towards the camera service.
> > > > >
> > > > > Is this analysis correct, or am I missing something ?
> > > >
> > > > Thanks for analyzing the details. Yes your analysis looks correct.
> > > > Well, I expect this patch is to retain memory leakage in any cases.
> > > > I mean that no memory is leaked due to any failures.
> > > >
> > > > I agree that the failure 1 should be handled properly.
> > > > The fix to failure 2 also sounds good to me.
> > >
> > > Thanks for confirming. Do you plan to look into addressing issues 1
> > > and/or 2 ?
> >
> > Sure. I will do it. Can we review and check in this patch while I am doing?
>
> Overall I think the patch is OK, but could we clear the descriptors_ map
> when Camera::stop() is called in CameraDevice::configureStreams() ? I
> would create a new private CameraDevice::stop() function:
>
> void CameraDevice::stop()
> {
>         worker_.stop();
>         camera_->stop();
>
>         descriptors_.clear();
>         running_ = false;
> }
>
> and call it from both CameraDevice::close() and
> CameraDevice::configureStreams().
>

Sounds good to me,
-Hiro
> > > > > > > +   if (node.empty()) {
> > > > > > > +           LOG(HAL, Error) << "Unknown request: " << request->cookie();
> > > > > > > +           status = CAMERA3_BUFFER_STATUS_ERROR;
> > > > > > > +           return;
> > > > > > > +   }
> > > > > > > +   Camera3RequestDescriptor& descriptor = node.mapped();
> > > > > > >
> > > > > > >     if (request->status() != Request::RequestComplete) {
> > > > > > >             LOG(HAL, Error) << "Request not successfully completed: "
> > > > > > > @@ -1915,7 +1922,7 @@ void CameraDevice::requestComplete(Request *request)
> > > > > > >     }
> > > > > > >
> > > > > > >     LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
> > > > > > > -                   << descriptor->buffers_.size() << " streams";
> > > > > > > +                   << descriptor.buffers_.size() << " streams";
> > > > > > >
> > > > > > >     /*
> > > > > > >      * \todo The timestamp used for the metadata is currently always taken
> > > > > > > @@ -1924,10 +1931,10 @@ void CameraDevice::requestComplete(Request *request)
> > > > > > >      * pipeline handlers) timestamp in the Request itself.
> > > > > > >      */
> > > > > > >     uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
> > > > > > > -   resultMetadata = getResultMetadata(*descriptor, timestamp);
> > > > > > > +   resultMetadata = getResultMetadata(descriptor, timestamp);
> > > > > > >
> > > > > > >     /* Handle any JPEG compression. */
> > > > > > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > > > > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > > > > > >             CameraStream *cameraStream =
> > > > > > >                     static_cast<CameraStream *>(buffer.stream->priv);
> > > > > > >
> > > > > > > @@ -1942,7 +1949,7 @@ void CameraDevice::requestComplete(Request *request)
> > > > > > >
> > > > > > >             int ret = cameraStream->process(*src,
> > > > > > >                                             *buffer.buffer,
> > > > > > > -                                           descriptor->settings_,
> > > > > > > +                                           descriptor.settings_,
> > > > > > >                                             resultMetadata.get());
> > > > > > >             if (ret) {
> > > > > > >                     status = CAMERA3_BUFFER_STATUS_ERROR;
> > > > > > > @@ -1959,17 +1966,17 @@ void CameraDevice::requestComplete(Request *request)
> > > > > > >
> > > > > > >     /* Prepare to call back the Android camera stack. */
> > > > > > >     camera3_capture_result_t captureResult = {};
> > > > > > > -   captureResult.frame_number = descriptor->frameNumber_;
> > > > > > > -   captureResult.num_output_buffers = descriptor->buffers_.size();
> > > > > > > -   for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > > > > > > +   captureResult.frame_number = descriptor.frameNumber_;
> > > > > > > +   captureResult.num_output_buffers = descriptor.buffers_.size();
> > > > > > > +   for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> > > > > > >             buffer.acquire_fence = -1;
> > > > > > >             buffer.release_fence = -1;
> > > > > > >             buffer.status = status;
> > > > > > >     }
> > > > > > > -   captureResult.output_buffers = descriptor->buffers_.data();
> > > > > > > +   captureResult.output_buffers = descriptor.buffers_.data();
> > > > > > >
> > > > > > >     if (status == CAMERA3_BUFFER_STATUS_OK) {
> > > > > > > -           notifyShutter(descriptor->frameNumber_, timestamp);
> > > > > > > +           notifyShutter(descriptor.frameNumber_, timestamp);
> > > > > > >
> > > > > > >             captureResult.partial_result = 1;
> > > > > > >             captureResult.result = resultMetadata->get();
> > > > > > > @@ -1982,13 +1989,11 @@ void CameraDevice::requestComplete(Request *request)
> > > > > > >              * is here signalled. Make sure the error path plays well with
> > > > > > >              * the camera stack state machine.
> > > > > > >              */
> > > > > > > -           notifyError(descriptor->frameNumber_,
> > > > > > > -                       descriptor->buffers_[0].stream);
> > > > > > > +           notifyError(descriptor.frameNumber_,
> > > > > > > +                       descriptor.buffers_[0].stream);
> > > > > > >     }
> > > > > > >
> > > > > > >     callbacks_->process_capture_result(callbacks_, &captureResult);
> > > > > > > -
> > > > > > > -   delete descriptor;
> > > > > > >  }
> > > > > > >
> > > > > > >  std::string CameraDevice::logPrefix() const
> > > > > > > diff --git a/src/android/camera_device.h b/src/android/camera_device.h
> > > > > > > index 11bdfec8..54d7c319 100644
> > > > > > > --- a/src/android/camera_device.h
> > > > > > > +++ b/src/android/camera_device.h
> > > > > > > @@ -69,9 +69,11 @@ private:
> > > > > > >     CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);
> > > > > > >
> > > > > > >     struct Camera3RequestDescriptor {
> > > > > > > +           Camera3RequestDescriptor();
> > > > > > > +           ~Camera3RequestDescriptor();
> > > > > > >             Camera3RequestDescriptor(libcamera::Camera *camera,
> > > > > > >                                      const camera3_capture_request_t *camera3Request);
> > > > > > > -           ~Camera3RequestDescriptor();
> > > > > > > +           Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);
> > > > > > >
> > > > > > >             uint32_t frameNumber_;
> > > > > > >             std::vector<camera3_stream_buffer_t> buffers_;
> > > > > > > @@ -122,6 +124,8 @@ private:
> > > > > > >     std::map<int, libcamera::PixelFormat> formatsMap_;
> > > > > > >     std::vector<CameraStream> streams_;
> > > > > > >
> > > > > > > +   std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
> > > > > > > +
> > > > > > >     std::string maker_;
> > > > > > >     std::string model_;
> > > > > > >
> > > > > > > diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
> > > > > > > index df436460..300ddde0 100644
> > > > > > > --- a/src/android/camera_worker.cpp
> > > > > > > +++ b/src/android/camera_worker.cpp
> > > > > > > @@ -26,10 +26,10 @@ LOG_DECLARE_CATEGORY(HAL)
> > > > > > >   * by the CameraWorker which queues it to the libcamera::Camera after handling
> > > > > > >   * fences.
> > > > > > >   */
> > > > > > > -CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
> > > > > > > +CaptureRequest::CaptureRequest(libcamera::Camera *camera)
> > > > > > >     : camera_(camera)
> > > > > > >  {
> > > > > > > -   request_ = camera_->createRequest(cookie);
> > > > > > > +   request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
> > > > > > >  }
> > > > > > >
> > > > > > >  void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
> > > > > > > diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
> > > > > > > index d7060576..64b1658b 100644
> > > > > > > --- a/src/android/camera_worker.h
> > > > > > > +++ b/src/android/camera_worker.h
> > > > > > > @@ -22,7 +22,7 @@ class CameraDevice;
> > > > > > >  class CaptureRequest
> > > > > > >  {
> > > > > > >  public:
> > > > > > > -   CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
> > > > > > > +   CaptureRequest(libcamera::Camera *camera);
> > > > > > >
> > > > > > >     const std::vector<int> &fences() const { return acquireFences_; }
> > > > > > >     libcamera::ControlList &controls() { return request_->controls(); }
>
> --
> Regards,
>
> Laurent Pinchart

Patch
diff mbox series

diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
index ae693664..50b017a3 100644
--- a/src/android/camera_device.cpp
+++ b/src/android/camera_device.cpp
@@ -287,15 +287,17 @@  CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(

 	/*
 	 * Create the libcamera::Request unique_ptr<> to tie its lifetime
-	 * to the descriptor's one. Set the descriptor's address as the
-	 * request's cookie to retrieve it at completion time.
+	 * to the descriptor's one.
 	 */
-	request_ = std::make_unique<CaptureRequest>(camera,
-						    reinterpret_cast<uint64_t>(this));
+	request_ = std::make_unique<CaptureRequest>(camera);
 }

+CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
 CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;

+CameraDevice::Camera3RequestDescriptor&
+CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
+
 /*
  * \class CameraDevice
  *
@@ -1811,8 +1813,8 @@  int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
 	 * The descriptor and the associated memory reserved here are freed
 	 * at request complete time.
 	 */
-	Camera3RequestDescriptor *descriptor =
-		new Camera3RequestDescriptor(camera_.get(), camera3Request);
+	Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
+
 	/*
 	 * \todo The Android request model is incremental, settings passed in
 	 * previous requests are to be effective until overridden explicitly in
@@ -1822,12 +1824,12 @@  int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
 	if (camera3Request->settings)
 		lastSettings_ = camera3Request->settings;
 	else
-		descriptor->settings_ = lastSettings_;
+		descriptor.settings_ = lastSettings_;

-	LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
-			<< " with " << descriptor->buffers_.size() << " streams";
-	for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
-		const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
+	LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
+			<< " with " << descriptor.buffers_.size() << " streams";
+	for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
+		const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
 		camera3_stream *camera3Stream = camera3Buffer.stream;
 		CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);

@@ -1860,7 +1862,7 @@  int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
 			 * lifetime management only.
 			 */
 			buffer = createFrameBuffer(*camera3Buffer.buffer);
-			descriptor->frameBuffers_.emplace_back(buffer);
+			descriptor.frameBuffers_.emplace_back(buffer);
 			LOG(HAL, Debug) << ss.str() << " (direct)";
 			break;

@@ -1879,11 +1881,10 @@  int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques

 		if (!buffer) {
 			LOG(HAL, Error) << "Failed to create buffer";
-			delete descriptor;
 			return -ENOMEM;
 		}

-		descriptor->request_->addBuffer(cameraStream->stream(), buffer,
+		descriptor.request_->addBuffer(cameraStream->stream(), buffer,
 						camera3Buffer.acquire_fence);
 	}

@@ -1891,11 +1892,12 @@  int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
 	 * Translate controls from Android to libcamera and queue the request
 	 * to the CameraWorker thread.
 	 */
-	int ret = processControls(descriptor);
+	int ret = processControls(&descriptor);
 	if (ret)
 		return ret;

-	worker_.queueRequest(descriptor->request_.get());
+	worker_.queueRequest(descriptor.request_.get());
+	descriptors_[descriptor.request_->cookie()] = std::move(descriptor);

 	return 0;
 }
@@ -1905,8 +1907,13 @@  void CameraDevice::requestComplete(Request *request)
 	const Request::BufferMap &buffers = request->buffers();
 	camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
 	std::unique_ptr<CameraMetadata> resultMetadata;
-	Camera3RequestDescriptor *descriptor =
-		reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
+	auto node = descriptors_.extract(request->cookie());
+	if (node.empty()) {
+		LOG(HAL, Error) << "Unknown request: " << request->cookie();
+		status = CAMERA3_BUFFER_STATUS_ERROR;
+		return;
+	}
+	Camera3RequestDescriptor& descriptor = node.mapped();

 	if (request->status() != Request::RequestComplete) {
 		LOG(HAL, Error) << "Request not successfully completed: "
@@ -1915,7 +1922,7 @@  void CameraDevice::requestComplete(Request *request)
 	}

 	LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
-			<< descriptor->buffers_.size() << " streams";
+			<< descriptor.buffers_.size() << " streams";

 	/*
 	 * \todo The timestamp used for the metadata is currently always taken
@@ -1924,10 +1931,10 @@  void CameraDevice::requestComplete(Request *request)
 	 * pipeline handlers) timestamp in the Request itself.
 	 */
 	uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
-	resultMetadata = getResultMetadata(*descriptor, timestamp);
+	resultMetadata = getResultMetadata(descriptor, timestamp);

 	/* Handle any JPEG compression. */
-	for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
+	for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
 		CameraStream *cameraStream =
 			static_cast<CameraStream *>(buffer.stream->priv);

@@ -1942,7 +1949,7 @@  void CameraDevice::requestComplete(Request *request)

 		int ret = cameraStream->process(*src,
 						*buffer.buffer,
-						descriptor->settings_,
+						descriptor.settings_,
 						resultMetadata.get());
 		if (ret) {
 			status = CAMERA3_BUFFER_STATUS_ERROR;
@@ -1959,17 +1966,17 @@  void CameraDevice::requestComplete(Request *request)

 	/* Prepare to call back the Android camera stack. */
 	camera3_capture_result_t captureResult = {};
-	captureResult.frame_number = descriptor->frameNumber_;
-	captureResult.num_output_buffers = descriptor->buffers_.size();
-	for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
+	captureResult.frame_number = descriptor.frameNumber_;
+	captureResult.num_output_buffers = descriptor.buffers_.size();
+	for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
 		buffer.acquire_fence = -1;
 		buffer.release_fence = -1;
 		buffer.status = status;
 	}
-	captureResult.output_buffers = descriptor->buffers_.data();
+	captureResult.output_buffers = descriptor.buffers_.data();

 	if (status == CAMERA3_BUFFER_STATUS_OK) {
-		notifyShutter(descriptor->frameNumber_, timestamp);
+		notifyShutter(descriptor.frameNumber_, timestamp);

 		captureResult.partial_result = 1;
 		captureResult.result = resultMetadata->get();
@@ -1982,13 +1989,11 @@  void CameraDevice::requestComplete(Request *request)
 		 * is here signalled. Make sure the error path plays well with
 		 * the camera stack state machine.
 		 */
-		notifyError(descriptor->frameNumber_,
-			    descriptor->buffers_[0].stream);
+		notifyError(descriptor.frameNumber_,
+			    descriptor.buffers_[0].stream);
 	}

 	callbacks_->process_capture_result(callbacks_, &captureResult);
-
-	delete descriptor;
 }

 std::string CameraDevice::logPrefix() const
diff --git a/src/android/camera_device.h b/src/android/camera_device.h
index 11bdfec8..54d7c319 100644
--- a/src/android/camera_device.h
+++ b/src/android/camera_device.h
@@ -69,9 +69,11 @@  private:
 	CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);

 	struct Camera3RequestDescriptor {
+		Camera3RequestDescriptor();
+		~Camera3RequestDescriptor();
 		Camera3RequestDescriptor(libcamera::Camera *camera,
 					 const camera3_capture_request_t *camera3Request);
-		~Camera3RequestDescriptor();
+		Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);

 		uint32_t frameNumber_;
 		std::vector<camera3_stream_buffer_t> buffers_;
@@ -122,6 +124,8 @@  private:
 	std::map<int, libcamera::PixelFormat> formatsMap_;
 	std::vector<CameraStream> streams_;

+	std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
+
 	std::string maker_;
 	std::string model_;

diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
index df436460..300ddde0 100644
--- a/src/android/camera_worker.cpp
+++ b/src/android/camera_worker.cpp
@@ -26,10 +26,10 @@  LOG_DECLARE_CATEGORY(HAL)
  * by the CameraWorker which queues it to the libcamera::Camera after handling
  * fences.
  */
-CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
+CaptureRequest::CaptureRequest(libcamera::Camera *camera)
 	: camera_(camera)
 {
-	request_ = camera_->createRequest(cookie);
+	request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
 }

 void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
index d7060576..64b1658b 100644
--- a/src/android/camera_worker.h
+++ b/src/android/camera_worker.h
@@ -22,7 +22,7 @@  class CameraDevice;
 class CaptureRequest
 {
 public:
-	CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
+	CaptureRequest(libcamera::Camera *camera);

 	const std::vector<int> &fences() const { return acquireFences_; }
 	libcamera::ControlList &controls() { return request_->controls(); }