[{"id":23650,"web_url":"https://patchwork.libcamera.org/comment/23650/","msgid":"<ede598c6fd7c24928761b65ef3848be0c072b0de.camel@collabora.com>","date":"2022-06-28T13:50:41","subject":"Re: [libcamera-devel] [PATCH 13/13] gstreamer: Fix race conditions\n\tin task pause/resume","submitter":{"id":31,"url":"https://patchwork.libcamera.org/api/people/31/","name":"Nicolas Dufresne","email":"nicolas.dufresne@collabora.com"},"content":"Hi Laurent,\n\nthis one is difficult to follow, see some idea below.\n\nLe vendredi 24 juin 2022 à 02:22 +0300, Laurent Pinchart a écrit :\n> The task run function races with two other threads that want to resume\n> the task: the requestCompleted() handler and the buffer-notify signal\n> handler. If the former queues completed requests or the latter queues\n> back buffers to the pool, and then resume the task, after the task run\n> handler checks the queues but before it attemps to pause the task, then\n> the task may be paused without noticing that more work is available.\n> \n> The most immediate way to fix this is to take the stream_lock in the\n> requestCompleted() and buffer-notify signal handlers, or cover the whole\n> task run handler with the GstLibcameraSrcState lock. This could cause\n> long delays in the requestCompleted() handler, so that's not a good\n> option.\n> \n> Instead, add a wakeup flag, preotected by the GstLibcameraSrcState lock,\n> that allows detection of a lost race, and retry the task run.\n> \n> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>\n> ---\n>  src/gstreamer/gstlibcamerasrc.cpp | 82 +++++++++++++++++++++++--------\n>  1 file changed, 62 insertions(+), 20 deletions(-)\n> \n> diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp\n> index 3feb87254916..59400f17ae85 100644\n> --- a/src/gstreamer/gstlibcamerasrc.cpp\n> +++ b/src/gstreamer/gstlibcamerasrc.cpp\n> @@ -120,6 +120,7 @@ struct GstLibcameraSrcState {\n>  \t\tLIBCAMERA_TSA_GUARDED_BY(lock_);\n>  \tstd::queue<std::unique_ptr<RequestWrap>> completedRequests_\n>  \t\tLIBCAMERA_TSA_GUARDED_BY(lock_);\n> +\tbool wakeup_ LIBCAMERA_TSA_GUARDED_BY(lock_);\n>  \n>  \tguint group_id_;\n>  \n> @@ -237,14 +238,16 @@ GstLibcameraSrcState::requestCompleted(Request *request)\n>  \t{\n>  \t\tMutexLocker locker(lock_);\n>  \t\tcompletedRequests_.push(std::move(wrap));\n> -\t}\n> +\t\twakeup_ = true;\n>  \n> -\tgst_task_resume(src_->task);\n> +\t\tgst_task_resume(src_->task);\n> +\t}\n>  }\n>  \n>  int GstLibcameraSrcState::processRequest()\n>  {\n>  \tstd::unique_ptr<RequestWrap> wrap;\n> +\tint err = 0;\n>  \n>  \t{\n>  \t\tMutexLocker locker(lock_);\n> @@ -253,10 +256,13 @@ int GstLibcameraSrcState::processRequest()\n>  \t\t\twrap = std::move(completedRequests_.front());\n>  \t\t\tcompletedRequests_.pop();\n>  \t\t}\n> +\n> +\t\tif (completedRequests_.empty())\n> +\t\t\terr = -ENOBUFS;\n>  \t}\n>  \n>  \tif (!wrap)\n> -\t\treturn -ENODATA;\n> +\t\treturn -ENOBUFS;\n>  \n>  \tGstFlowReturn ret = GST_FLOW_OK;\n>  \tgst_flow_combiner_reset(src_->flow_combiner);\n> @@ -296,7 +302,7 @@ int GstLibcameraSrcState::processRequest()\n>  \t\treturn -EPIPE;\n>  \t}\n>  \n> -\treturn 0;\n> +\treturn err;\n>  }\n>  \n>  static bool\n> @@ -360,53 +366,88 @@ gst_libcamera_src_open(GstLibcameraSrc *self)\n>  \treturn true;\n>  }\n>  \n> +static void\n> +gst_libcamera_src_task_resume(gpointer user_data)\n> +{\n> +\tGstLibcameraSrc *self = GST_LIBCAMERA_SRC(user_data);\n> +\tGstLibcameraSrcState *state = self->state;\n> +\n> +\tMutexLocker locker(state->lock_);\n> +\tstate->wakeup_ = true;\n> +\tgst_task_resume(self->task);\n> +}\n> +\n>  static void\n>  gst_libcamera_src_task_run(gpointer user_data)\n>  {\n>  \tGstLibcameraSrc *self = GST_LIBCAMERA_SRC(user_data);\n>  \tGstLibcameraSrcState *state = self->state;\n>  \n> +\t{\n> +\t\tMutexLocker locker(state->lock_);\n> +\t\tstate->wakeup_ = true;\n> +\t}\n> +\n> +\tbool doPause = true;\n> +\n>  \t/*\n>  \t * Create and queue one request. If no buffers are available the\n>  \t * function returns -ENOBUFS, which we ignore here as that's not a\n>  \t * fatal error.\n>  \t */\n>  \tint ret = state->queueRequest();\n> -\tif (ret == -ENOMEM) {\n> +\tswitch (ret) {\n> +\tcase 0:\n> +\t\t/*\n> +\t\t * The request was successfully queued, there may be enough\n> +\t\t * buffers to create a new one. Don't pause the task to give it\n> +\t\t * another try.\n> +\t\t */\n> +\t\tdoPause = false;\n> +\t\tbreak;\n> +\n> +\tcase -ENOMEM:\n>  \t\tGST_ELEMENT_ERROR(self, RESOURCE, NO_SPACE_LEFT,\n>  \t\t\t\t  (\"Failed to allocate request for camera '%s'.\",\n>  \t\t\t\t   state->cam_->id().c_str()),\n>  \t\t\t\t  (\"libcamera::Camera::createRequest() failed\"));\n>  \t\tgst_task_stop(self->task);\n>  \t\treturn;\n> +\n> +\tcase -ENOBUFS:\n> +\tdefault:\n> +\t\tbreak;\n>  \t}\n>  \n> -\t/* Process one completed request, if available. */\n> +\t/*\n> +\t * Process one completed request, if available, and record if further\n> +\t * requests are ready for processing.\n> +\t */\n>  \tret = state->processRequest();\n>  \tswitch (ret) {\n> +\tcase -ENOBUFS:\n> +\t\tdoPause = false;\n> +\t\tbreak;\n> +\n>  \tcase -EPIPE:\n>  \t\tgst_task_stop(self->task);\n>  \t\treturn;\n>  \n> -\tcase -ENODATA:\n> -\t\tgst_task_pause(self->task);\n> -\t\treturn;\n> +\tcase 0:\n> +\tdefault:\n> +\t\tbreak;\n>  \t}\n>  \n>  \t/*\n> -\t * Here we need to decide if we want to pause. This needs to\n> -\t * happen in lock step with the callback thread which may want\n> -\t * to resume the task and might push pending buffers.\n> +\t * Here we need to decide if we want to pause. This needs to happen in\n> +\t * lock step with the requestCompleted callback and the buffer-notify\n> +\t * signal handler that resume the task.\n>  \t */\n> -\tbool do_pause;\n> -\n> -\t{\n> +\tif (doPause) {\n>  \t\tMutexLocker locker(state->lock_);\n> -\t\tdo_pause = state->completedRequests_.empty();\n> +\t\tif (!state->wakeup_)\n> +\t\t\tgst_task_pause(self->task);\n\nThis dance is difficult to follow. Perhaps we could start the run function by\n\"pausing\" the task? Then we will resume the task if state->processRequest() ==\n0, or concurrently if a request completed. Using a doResume bool instead of\ndoPause.\n\n>  \t}\n> -\n> -\tif (do_pause)\n> -\t\tgst_task_pause(self->task);\n>  }\n>  \n>  static void\n> @@ -517,7 +558,8 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,\n>  \t\tGstLibcameraPool *pool = gst_libcamera_pool_new(self->allocator,\n>  \t\t\t\t\t\t\t\tstream_cfg.stream());\n>  \t\tg_signal_connect_swapped(pool, \"buffer-notify\",\n> -\t\t\t\t\t G_CALLBACK(gst_task_resume), task);\n> +\t\t\t\t\t G_CALLBACK(gst_libcamera_src_task_resume),\n> +\t\t\t\t\t self);\n>  \n>  \t\tgst_libcamera_pad_set_pool(srcpad, pool);\n>  \t\tgst_flow_combiner_add_pad(self->flow_combiner, srcpad);","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id CBAA7BE173\n\tfor <parsemail@patchwork.libcamera.org>;\n\tTue, 28 Jun 2022 13:50:52 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 3835965631;\n\tTue, 28 Jun 2022 15:50:52 +0200 (CEST)","from madras.collabora.co.uk (madras.collabora.co.uk\n\t[46.235.227.172])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 39C536059D\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 28 Jun 2022 15:50:50 +0200 (CEST)","from nicolas-tpx395.localdomain (192-222-136-102.qc.cable.ebox.net\n\t[192.222.136.102])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\tkey-exchange X25519 server-signature RSA-PSS (4096 bits)\n\tserver-digest SHA256)\n\t(No client certificate requested) (Authenticated sender: nicolas)\n\tby madras.collabora.co.uk (Postfix) with ESMTPSA id 6569166015C8;\n\tTue, 28 Jun 2022 14:50:49 +0100 (BST)"],"DKIM-Signature":["v=1; a=rsa-sha256; c=relaxed/simple; d=libcamera.org;\n\ts=mail; t=1656424252;\n\tbh=jAH1022hwsxvLmKmdn7/ICYk921AQN2LBTYj756+LRM=;\n\th=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe:\n\tList-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc:\n\tFrom;\n\tb=sxytz948/edD7Vy6UFDLrwgi3mPDeDR13+q5Ip+arpv0Y/D1ti/5c9I/cqTHxBA61\n\tHgelgwnvUYIEM6eaPUw6PHvijcfvLIbIksmnVZLy0GCySo9kUrRzzG/IJLStuyJ35o\n\tA1/wNwatqowGyfj2Xrwj24OEHVitUvk2vXWWLSuvfpWrwReJz4hD5FUkw2phtXUyKG\n\tlRS1SVff/95LXQluXjHtLwrb4keodBUsGjk1fXXiSM3MZStILnfX/4WsrEgMP2NIpn\n\tKt98n4MrEAmvTxxMulXNA17l49SG63FUySCQ9uxaUDUWTj/9n7D9cKfG9ykTkJAe49\n\tgN6BMnq2ibHaA==","v=1; a=rsa-sha256; c=relaxed/simple; d=collabora.com;\n\ts=mail; t=1656424249;\n\tbh=jAH1022hwsxvLmKmdn7/ICYk921AQN2LBTYj756+LRM=;\n\th=Subject:From:To:Cc:Date:In-Reply-To:References:From;\n\tb=TFpQi2VFqK9bjtQ2C1xQYYtcHQ6zO71cZw2OgM61eXEC10lVo3e6xGUUQL1on2OQi\n\ten0xBlaZPPNSm6+xYh8egyHfGfxSi+I3wKMoONqMNGyB0Rh6m1PEiaBIufDeGL1OET\n\tqgTxuqGOb7EC8tJXwqC+ZGsK+TibEL2pPV7J48SHORrT1WULGtQXjgvrd7IlppDFJ1\n\t5hr4Y0R8zdh29Z17/LbK9MLfXQ5DIMcqAEsIyrzl8dmbuB740pQU5txdxGGq8oyxiW\n\taxA/rZZIkLaxPle/lv4zj2wwXdGZu3f9/GVXx3nHfP2nsrCB816nO51s30/tn6YEot\n\tJgT2bUyamFtYg=="],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (2048-bit key; \n\tunprotected) header.d=collabora.com\n\theader.i=@collabora.com\n\theader.b=\"TFpQi2VF\"; dkim-atps=neutral","Message-ID":"<ede598c6fd7c24928761b65ef3848be0c072b0de.camel@collabora.com>","To":"Laurent Pinchart <laurent.pinchart@ideasonboard.com>, \n\tlibcamera-devel@lists.libcamera.org","Date":"Tue, 28 Jun 2022 09:50:41 -0400","In-Reply-To":"<20220623232210.18742-14-laurent.pinchart@ideasonboard.com>","References":"<20220623232210.18742-1-laurent.pinchart@ideasonboard.com>\n\t<20220623232210.18742-14-laurent.pinchart@ideasonboard.com>","Content-Type":"text/plain; charset=\"UTF-8\"","Content-Transfer-Encoding":"quoted-printable","User-Agent":"Evolution 3.44.2 (3.44.2-1.fc36) ","MIME-Version":"1.0","Subject":"Re: [libcamera-devel] [PATCH 13/13] gstreamer: Fix race conditions\n\tin task pause/resume","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","From":"Nicolas Dufresne via libcamera-devel\n\t<libcamera-devel@lists.libcamera.org>","Reply-To":"Nicolas Dufresne <nicolas.dufresne@collabora.com>","Cc":"Vedant Paranjape <vedantparanjape160201@gmail.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":23666,"web_url":"https://patchwork.libcamera.org/comment/23666/","msgid":"<YrzlMT1Z483aLx29@pendragon.ideasonboard.com>","date":"2022-06-29T23:50:09","subject":"Re: [libcamera-devel] [PATCH 13/13] gstreamer: Fix race conditions\n\tin task pause/resume","submitter":{"id":2,"url":"https://patchwork.libcamera.org/api/people/2/","name":"Laurent Pinchart","email":"laurent.pinchart@ideasonboard.com"},"content":"Hi Nicolas,\n\nOn Tue, Jun 28, 2022 at 09:50:41AM -0400, Nicolas Dufresne wrote:\n> Hi Laurent,\n> \n> this one is difficult to follow, see some idea below.\n\nThat matches the experience I had writing it :-)\n\n> Le vendredi 24 juin 2022 à 02:22 +0300, Laurent Pinchart a écrit :\n> > The task run function races with two other threads that want to resume\n> > the task: the requestCompleted() handler and the buffer-notify signal\n> > handler. If the former queues completed requests or the latter queues\n> > back buffers to the pool, and then resume the task, after the task run\n> > handler checks the queues but before it attemps to pause the task, then\n> > the task may be paused without noticing that more work is available.\n> > \n> > The most immediate way to fix this is to take the stream_lock in the\n> > requestCompleted() and buffer-notify signal handlers, or cover the whole\n> > task run handler with the GstLibcameraSrcState lock. This could cause\n> > long delays in the requestCompleted() handler, so that's not a good\n> > option.\n> > \n> > Instead, add a wakeup flag, preotected by the GstLibcameraSrcState lock,\n> > that allows detection of a lost race, and retry the task run.\n> > \n> > Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>\n> > ---\n> >  src/gstreamer/gstlibcamerasrc.cpp | 82 +++++++++++++++++++++++--------\n> >  1 file changed, 62 insertions(+), 20 deletions(-)\n> > \n> > diff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp\n> > index 3feb87254916..59400f17ae85 100644\n> > --- a/src/gstreamer/gstlibcamerasrc.cpp\n> > +++ b/src/gstreamer/gstlibcamerasrc.cpp\n> > @@ -120,6 +120,7 @@ struct GstLibcameraSrcState {\n> >  \t\tLIBCAMERA_TSA_GUARDED_BY(lock_);\n> >  \tstd::queue<std::unique_ptr<RequestWrap>> completedRequests_\n> >  \t\tLIBCAMERA_TSA_GUARDED_BY(lock_);\n> > +\tbool wakeup_ LIBCAMERA_TSA_GUARDED_BY(lock_);\n> >  \n> >  \tguint group_id_;\n> >  \n> > @@ -237,14 +238,16 @@ GstLibcameraSrcState::requestCompleted(Request *request)\n> >  \t{\n> >  \t\tMutexLocker locker(lock_);\n> >  \t\tcompletedRequests_.push(std::move(wrap));\n> > -\t}\n> > +\t\twakeup_ = true;\n> >  \n> > -\tgst_task_resume(src_->task);\n> > +\t\tgst_task_resume(src_->task);\n> > +\t}\n> >  }\n> >  \n> >  int GstLibcameraSrcState::processRequest()\n> >  {\n> >  \tstd::unique_ptr<RequestWrap> wrap;\n> > +\tint err = 0;\n> >  \n> >  \t{\n> >  \t\tMutexLocker locker(lock_);\n> > @@ -253,10 +256,13 @@ int GstLibcameraSrcState::processRequest()\n> >  \t\t\twrap = std::move(completedRequests_.front());\n> >  \t\t\tcompletedRequests_.pop();\n> >  \t\t}\n> > +\n> > +\t\tif (completedRequests_.empty())\n> > +\t\t\terr = -ENOBUFS;\n> >  \t}\n> >  \n> >  \tif (!wrap)\n> > -\t\treturn -ENODATA;\n> > +\t\treturn -ENOBUFS;\n> >  \n> >  \tGstFlowReturn ret = GST_FLOW_OK;\n> >  \tgst_flow_combiner_reset(src_->flow_combiner);\n> > @@ -296,7 +302,7 @@ int GstLibcameraSrcState::processRequest()\n> >  \t\treturn -EPIPE;\n> >  \t}\n> >  \n> > -\treturn 0;\n> > +\treturn err;\n> >  }\n> >  \n> >  static bool\n> > @@ -360,53 +366,88 @@ gst_libcamera_src_open(GstLibcameraSrc *self)\n> >  \treturn true;\n> >  }\n> >  \n> > +static void\n> > +gst_libcamera_src_task_resume(gpointer user_data)\n> > +{\n> > +\tGstLibcameraSrc *self = GST_LIBCAMERA_SRC(user_data);\n> > +\tGstLibcameraSrcState *state = self->state;\n> > +\n> > +\tMutexLocker locker(state->lock_);\n> > +\tstate->wakeup_ = true;\n> > +\tgst_task_resume(self->task);\n> > +}\n> > +\n> >  static void\n> >  gst_libcamera_src_task_run(gpointer user_data)\n> >  {\n> >  \tGstLibcameraSrc *self = GST_LIBCAMERA_SRC(user_data);\n> >  \tGstLibcameraSrcState *state = self->state;\n> >  \n> > +\t{\n> > +\t\tMutexLocker locker(state->lock_);\n> > +\t\tstate->wakeup_ = true;\n\nLooks like there's a bug here, wakeup_ needs to be set to false. It's\nalways true otherwise.\n\n> > +\t}\n> > +\n> > +\tbool doPause = true;\n> > +\n> >  \t/*\n> >  \t * Create and queue one request. If no buffers are available the\n> >  \t * function returns -ENOBUFS, which we ignore here as that's not a\n> >  \t * fatal error.\n> >  \t */\n> >  \tint ret = state->queueRequest();\n> > -\tif (ret == -ENOMEM) {\n> > +\tswitch (ret) {\n> > +\tcase 0:\n> > +\t\t/*\n> > +\t\t * The request was successfully queued, there may be enough\n> > +\t\t * buffers to create a new one. Don't pause the task to give it\n> > +\t\t * another try.\n> > +\t\t */\n> > +\t\tdoPause = false;\n> > +\t\tbreak;\n> > +\n> > +\tcase -ENOMEM:\n> >  \t\tGST_ELEMENT_ERROR(self, RESOURCE, NO_SPACE_LEFT,\n> >  \t\t\t\t  (\"Failed to allocate request for camera '%s'.\",\n> >  \t\t\t\t   state->cam_->id().c_str()),\n> >  \t\t\t\t  (\"libcamera::Camera::createRequest() failed\"));\n> >  \t\tgst_task_stop(self->task);\n> >  \t\treturn;\n> > +\n> > +\tcase -ENOBUFS:\n> > +\tdefault:\n> > +\t\tbreak;\n> >  \t}\n> >  \n> > -\t/* Process one completed request, if available. */\n> > +\t/*\n> > +\t * Process one completed request, if available, and record if further\n> > +\t * requests are ready for processing.\n> > +\t */\n> >  \tret = state->processRequest();\n> >  \tswitch (ret) {\n> > +\tcase -ENOBUFS:\n> > +\t\tdoPause = false;\n\nAnd here too, this needs to go to case 0.\n\n> > +\t\tbreak;\n> > +\n> >  \tcase -EPIPE:\n> >  \t\tgst_task_stop(self->task);\n> >  \t\treturn;\n> >  \n> > -\tcase -ENODATA:\n> > -\t\tgst_task_pause(self->task);\n> > -\t\treturn;\n> > +\tcase 0:\n> > +\tdefault:\n> > +\t\tbreak;\n> >  \t}\n> >  \n> >  \t/*\n> > -\t * Here we need to decide if we want to pause. This needs to\n> > -\t * happen in lock step with the callback thread which may want\n> > -\t * to resume the task and might push pending buffers.\n> > +\t * Here we need to decide if we want to pause. This needs to happen in\n> > +\t * lock step with the requestCompleted callback and the buffer-notify\n> > +\t * signal handler that resume the task.\n> >  \t */\n> > -\tbool do_pause;\n> > -\n> > -\t{\n> > +\tif (doPause) {\n> >  \t\tMutexLocker locker(state->lock_);\n> > -\t\tdo_pause = state->completedRequests_.empty();\n> > +\t\tif (!state->wakeup_)\n> > +\t\t\tgst_task_pause(self->task);\n> \n> This dance is difficult to follow. Perhaps we could start the run function by\n> \"pausing\" the task? Then we will resume the task if state->processRequest() ==\n> 0, or concurrently if a request completed. Using a doResume bool instead of\n> doPause.\n\nI've given it a try, and the result is as follows:\n\ndiff --git a/src/gstreamer/gstlibcamerasrc.cpp b/src/gstreamer/gstlibcamerasrc.cpp\nindex 6ee315cf5efe..324ea457eb4f 100644\n--- a/src/gstreamer/gstlibcamerasrc.cpp\n+++ b/src/gstreamer/gstlibcamerasrc.cpp\n@@ -411,7 +411,9 @@ gst_libcamera_src_task_run(gpointer user_data)\n \t\tstate->wakeup_ = false;\n \t}\n \n-\tbool doPause = true;\n+\tbool doResume = false;\n+\n+\tgst_task_pause(self->task);\n \n \t/*\n \t * Create and queue one request. If no buffers are available the\n@@ -426,7 +428,7 @@ gst_libcamera_src_task_run(gpointer user_data)\n \t\t * buffers to create a new one. Don't pause the task to give it\n \t\t * another try.\n \t\t */\n-\t\tdoPause = false;\n+\t\tdoResume = true;\n \t\tbreak;\n \n \tcase -ENOMEM:\n@@ -449,11 +451,8 @@ gst_libcamera_src_task_run(gpointer user_data)\n \tret = state->processRequest();\n \tswitch (ret) {\n \tcase 0:\n-\t\t/*\n-\t\t * Another completed request is available, don't pause the\n-\t\t * task.\n-\t\t */\n-\t\tdoPause = false;\n+\t\t/* Another completed request is available, resume the task. */\n+\t\tdoResume = true;\n \t\tbreak;\n \n \tcase -EPIPE:\n@@ -466,13 +466,13 @@ gst_libcamera_src_task_run(gpointer user_data)\n \t}\n \n \t/*\n-\t * Here we need to decide if we want to pause. This needs to happen in\n+\t * Here we need to decide if we want to resume. This needs to happen in\n \t * lock step with the requestCompleted callback and the buffer-notify\n \t * signal handler that resume the task.\n \t */\n-\tif (doPause) {\n+\t{\n \t\tMutexLocker locker(state->lock_);\n-\t\tif (!state->wakeup_)\n-\t\t\tgst_task_pause(self->task);\n+\t\tif (doResume || state->wakeup_)\n+\t\t\tgst_task_resume(self->task);\n \t}\n }\n\nI'm not sure that's what you meant, as it doesn't seem much easier to\nfollow.\n\nWith the mechanism from v1 (and the two fixes mentioned above), the task\nruns ~80 times / second, is resumed ~55 times / second, and paused ~26\ntimes / second. With the opposite logic, we get ~80 pause/second (that's\nexpected, as it's paused at every run) and ~107 resume/second. That\nseems less efficient to me.\n\nI'll use the original mechanism with the bug fixes in v2, if you meant\nsomething else than the above, you can explain it in a reply to the new\npatch.\n\n> >  \t}\n> > -\n> > -\tif (do_pause)\n> > -\t\tgst_task_pause(self->task);\n> >  }\n> >  \n> >  static void\n> > @@ -517,7 +558,8 @@ gst_libcamera_src_task_enter(GstTask *task, [[maybe_unused]] GThread *thread,\n> >  \t\tGstLibcameraPool *pool = gst_libcamera_pool_new(self->allocator,\n> >  \t\t\t\t\t\t\t\tstream_cfg.stream());\n> >  \t\tg_signal_connect_swapped(pool, \"buffer-notify\",\n> > -\t\t\t\t\t G_CALLBACK(gst_task_resume), task);\n> > +\t\t\t\t\t G_CALLBACK(gst_libcamera_src_task_resume),\n> > +\t\t\t\t\t self);\n> >  \n> >  \t\tgst_libcamera_pad_set_pool(srcpad, pool);\n> >  \t\tgst_flow_combiner_add_pad(self->flow_combiner, srcpad);","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id 55E84BE173\n\tfor <parsemail@patchwork.libcamera.org>;\n\tWed, 29 Jun 2022 23:50:34 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 652B765635;\n\tThu, 30 Jun 2022 01:50:33 +0200 (CEST)","from perceval.ideasonboard.com (perceval.ideasonboard.com\n\t[IPv6:2001:4b98:dc2:55:216:3eff:fef7:d647])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 5897A6059D\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tThu, 30 Jun 2022 01:50:31 +0200 (CEST)","from pendragon.ideasonboard.com (62-78-145-57.bb.dnainternet.fi\n\t[62.78.145.57])\n\tby perceval.ideasonboard.com (Postfix) with ESMTPSA id 0670259D;\n\tThu, 30 Jun 2022 01:50:29 +0200 (CEST)"],"DKIM-Signature":["v=1; a=rsa-sha256; c=relaxed/simple; d=libcamera.org;\n\ts=mail; t=1656546633;\n\tbh=Kvgz3k00WIjYZV8WqJuCMbOvfiqIuv5D9niTfHTgHxs=;\n\th=Date:To:References:In-Reply-To:Subject:List-Id:List-Unsubscribe:\n\tList-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc:\n\tFrom;\n\tb=ShpjFpNhNj9tpCD1oTRqpopBc/odfy6TazgnhKk/AU2PhwEZCR8YcByF4LK3A6fRi\n\t3jxYd8PZ03AGlhUXqrW1ryfKTtl6oW8Mif391ulMdwPvLq5AAXF+r0cT8vL45Q5HFR\n\tKscbstHdmfcmOSwzaIo0Z/7aUXSqTxzRcLO8bdq4prhJvD8HfnlKrfeyswrHRWSIgv\n\tkI0ujHsXvlQpI/8gODt9oIyduCwnelfgEMqQvjM6N3xlLNlc3R3m2JmTR5k9fxNjsN\n\tRba2/88lIszijYIx0CH5+ZbT17On8l5cXXVC583OHry5uGl3FjV3QhEKDPs9C9io3I\n\tae3KCwxrmfBkw==","v=1; a=rsa-sha256; c=relaxed/simple; d=ideasonboard.com;\n\ts=mail; t=1656546630;\n\tbh=Kvgz3k00WIjYZV8WqJuCMbOvfiqIuv5D9niTfHTgHxs=;\n\th=Date:From:To:Cc:Subject:References:In-Reply-To:From;\n\tb=s4krR3nwUX7KVvBZ3hxE8mu3VRtx7Ey5dHvf62aYT9shg3xt7E/oPt3K2f8psbqq3\n\tc5hcflvonYIQQtVcCXXzm6vWjtTRVeFO4AYbjbI2+CRHDg22ulT66HDbAetFMEuUMM\n\trAczrKmW32KlbYNln70UZG9UvEhFO+oMimJG0C1E="],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key; \n\tunprotected) header.d=ideasonboard.com\n\theader.i=@ideasonboard.com\n\theader.b=\"s4krR3nw\"; dkim-atps=neutral","Date":"Thu, 30 Jun 2022 02:50:09 +0300","To":"Nicolas Dufresne <nicolas.dufresne@collabora.com>","Message-ID":"<YrzlMT1Z483aLx29@pendragon.ideasonboard.com>","References":"<20220623232210.18742-1-laurent.pinchart@ideasonboard.com>\n\t<20220623232210.18742-14-laurent.pinchart@ideasonboard.com>\n\t<ede598c6fd7c24928761b65ef3848be0c072b0de.camel@collabora.com>","MIME-Version":"1.0","Content-Type":"text/plain; charset=utf-8","Content-Disposition":"inline","Content-Transfer-Encoding":"8bit","In-Reply-To":"<ede598c6fd7c24928761b65ef3848be0c072b0de.camel@collabora.com>","Subject":"Re: [libcamera-devel] [PATCH 13/13] gstreamer: Fix race conditions\n\tin task pause/resume","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","From":"Laurent Pinchart via libcamera-devel\n\t<libcamera-devel@lists.libcamera.org>","Reply-To":"Laurent Pinchart <laurent.pinchart@ideasonboard.com>","Cc":"libcamera-devel@lists.libcamera.org,\n\tVedant Paranjape <vedantparanjape160201@gmail.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":23688,"web_url":"https://patchwork.libcamera.org/comment/23688/","msgid":"<00c13ee164ec9cbfad8ca0955252b914cdfbf87e.camel@collabora.com>","date":"2022-06-30T19:32:05","subject":"Re: [libcamera-devel] [PATCH 13/13] gstreamer: Fix race conditions\n\tin task pause/resume","submitter":{"id":31,"url":"https://patchwork.libcamera.org/api/people/31/","name":"Nicolas Dufresne","email":"nicolas.dufresne@collabora.com"},"content":"Le jeudi 30 juin 2022 à 02:50 +0300, Laurent Pinchart a écrit :\n> > This dance is difficult to follow. Perhaps we could start the run function\n> > by\n> > \"pausing\" the task? Then we will resume the task if state->processRequest()\n> > ==\n> > 0, or concurrently if a request completed. Using a doResume bool instead of\n> > doPause.\n> \n> I've given it a try, and the result is as follows:\n> \n> diff --git a/src/gstreamer/gstlibcamerasrc.cpp\n> b/src/gstreamer/gstlibcamerasrc.cpp\n> index 6ee315cf5efe..324ea457eb4f 100644\n> --- a/src/gstreamer/gstlibcamerasrc.cpp\n> +++ b/src/gstreamer/gstlibcamerasrc.cpp\n> @@ -411,7 +411,9 @@ gst_libcamera_src_task_run(gpointer user_data)\n>  \t\tstate->wakeup_ = false;\n>  \t}\n>  \n> -\tbool doPause = true;\n> +\tbool doResume = false;\n> +\n> +\tgst_task_pause(self->task);\n>  \n>  \t/*\n>  \t * Create and queue one request. If no buffers are available the\n> @@ -426,7 +428,7 @@ gst_libcamera_src_task_run(gpointer user_data)\n>  \t\t * buffers to create a new one. Don't pause the task to give\n> it\n>  \t\t * another try.\n>  \t\t */\n> -\t\tdoPause = false;\n> +\t\tdoResume = true;\n>  \t\tbreak;\n>  \n>  \tcase -ENOMEM:\n> @@ -449,11 +451,8 @@ gst_libcamera_src_task_run(gpointer user_data)\n>  \tret = state->processRequest();\n>  \tswitch (ret) {\n>  \tcase 0:\n> -\t\t/*\n> -\t\t * Another completed request is available, don't pause the\n> -\t\t * task.\n> -\t\t */\n> -\t\tdoPause = false;\n> +\t\t/* Another completed request is available, resume the task.\n> */\n> +\t\tdoResume = true;\n>  \t\tbreak;\n>  \n>  \tcase -EPIPE:\n> @@ -466,13 +466,13 @@ gst_libcamera_src_task_run(gpointer user_data)\n>  \t}\n>  \n>  \t/*\n> -\t * Here we need to decide if we want to pause. This needs to happen\n> in\n> +\t * Here we need to decide if we want to resume. This needs to happen\n> in\n>  \t * lock step with the requestCompleted callback and the buffer-notify\n>  \t * signal handler that resume the task.\n>  \t */\n> -\tif (doPause) {\n> +\t{\n>  \t\tMutexLocker locker(state->lock_);\n> -\t\tif (!state->wakeup_)\n> -\t\t\tgst_task_pause(self->task);\n> +\t\tif (doResume || state->wakeup_)\n> +\t\t\tgst_task_resume(self->task);\n>  \t}\n>  }\n> \n> I'm not sure that's what you meant, as it doesn't seem much easier to\n> follow.\n> \n> With the mechanism from v1 (and the two fixes mentioned above), the task\n> runs ~80 times / second, is resumed ~55 times / second, and paused ~26\n> times / second. With the opposite logic, we get ~80 pause/second (that's\n> expected, as it's paused at every run) and ~107 resume/second. That\n> seems less efficient to me.\n> \n> I'll use the original mechanism with the bug fixes in v2, if you meant\n> something else than the above, you can explain it in a reply to the new\n> patch.\n\nYou don't need state->wakeup_ if you use the suggested resume technique, making\nthe fix less invasive. The reason is run() function is the only function that\nmay pause the task. Getting an external caller to resume will just lead to run()\nto pause the task again. So you don't have to track what is happening in the\noutside world. Other threads just resume the task, and the run() function\nassumes it will be paused, and then resume() itself as needed.\n\nNicolas","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id B75DABD808\n\tfor <parsemail@patchwork.libcamera.org>;\n\tThu, 30 Jun 2022 19:32:16 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 268B86564F;\n\tThu, 30 Jun 2022 21:32:16 +0200 (CEST)","from madras.collabora.co.uk (madras.collabora.co.uk\n\t[IPv6:2a00:1098:0:82:1000:25:2eeb:e5ab])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 5553065633\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tThu, 30 Jun 2022 21:32:15 +0200 (CEST)","from nicolas-tpx395.localdomain (192-222-136-102.qc.cable.ebox.net\n\t[192.222.136.102])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\tkey-exchange X25519 server-signature RSA-PSS (4096 bits))\n\t(No client certificate requested) (Authenticated sender: nicolas)\n\tby madras.collabora.co.uk (Postfix) with ESMTPSA id 791286601969;\n\tThu, 30 Jun 2022 20:32:14 +0100 (BST)"],"DKIM-Signature":["v=1; a=rsa-sha256; c=relaxed/simple; d=libcamera.org;\n\ts=mail; t=1656617536;\n\tbh=L9i4ZBfyEXPlFJV2Tna5f1eZdhaL8Nqe5MGNwX6ObJ8=;\n\th=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe:\n\tList-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc:\n\tFrom;\n\tb=VOno+6gAtyf+vujKuLkONM3HfbBnkXcVvJuMfHvO5tHRZrCGXJgH+seCzkSyKkjVV\n\tnv/j1X1anxP1wXqiyI387IuCcN8INVOfROiLjSLfHrvZHUqMNQi411/5Qq2w+2tLgw\n\t/vgQsvJtSlbySIxKwM/40aipMTTs30fsMsYe4fnTm6r1/DjZ6oaoW3kwx8FIaVC2cU\n\tpxqdkmPbP/5q30H0RCI+R8yAzIwE+9QBpBUruChcO3S/+c/hjWq0UC3P9puEG2QKfN\n\tJpV0pystPyz4k6fIV90scCohxGQa2RSxOPWemh2UAreXCreCSfFNh0iDyug1lIRAG9\n\tTk/vZgj/A6IFg==","v=1; a=rsa-sha256; c=relaxed/simple; d=collabora.com;\n\ts=mail; t=1656617535;\n\tbh=L9i4ZBfyEXPlFJV2Tna5f1eZdhaL8Nqe5MGNwX6ObJ8=;\n\th=Subject:From:To:Cc:Date:In-Reply-To:References:From;\n\tb=QHwunkcdgT/osSUSFKAKh6dJycuKzmbr9xf80pFErSai97mLWLcni2OhYRlxPEgQD\n\tiQCU4lLFWHwLsHJWNmuhiYdQAWOihK71igatY2oA8K0Yq2yPfPsU8S/++kY9f4eC7B\n\th/D6GZipos3B4/IPaH1H1+kR4oBNL1BVpZtJsJR5+jUgVhK44IZBQ7j5e6lui07y5i\n\tRjs5ys1XxkvLllLPPFFn2JHGx9QNXCTYyS0I3nqlMiju14dNBXOc5UtAt5S2HXbMSC\n\tneiJV1fN+4ZV3TZa4Ge923ypzG4Z/b4D19DSGKr2FMGB5kLk3jeTwg9ktKSrLtvOTJ\n\tl6wDzE7vop5aQ=="],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (2048-bit key; \n\tunprotected) header.d=collabora.com\n\theader.i=@collabora.com\n\theader.b=\"QHwunkcd\"; dkim-atps=neutral","Message-ID":"<00c13ee164ec9cbfad8ca0955252b914cdfbf87e.camel@collabora.com>","To":"Laurent Pinchart <laurent.pinchart@ideasonboard.com>","Date":"Thu, 30 Jun 2022 15:32:05 -0400","In-Reply-To":"<YrzlMT1Z483aLx29@pendragon.ideasonboard.com>","References":"<20220623232210.18742-1-laurent.pinchart@ideasonboard.com>\n\t<20220623232210.18742-14-laurent.pinchart@ideasonboard.com>\n\t<ede598c6fd7c24928761b65ef3848be0c072b0de.camel@collabora.com>\n\t<YrzlMT1Z483aLx29@pendragon.ideasonboard.com>","Content-Type":"text/plain; charset=\"UTF-8\"","Content-Transfer-Encoding":"quoted-printable","User-Agent":"Evolution 3.44.2 (3.44.2-1.fc36) ","MIME-Version":"1.0","Subject":"Re: [libcamera-devel] [PATCH 13/13] gstreamer: Fix race conditions\n\tin task pause/resume","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","From":"Nicolas Dufresne via libcamera-devel\n\t<libcamera-devel@lists.libcamera.org>","Reply-To":"Nicolas Dufresne <nicolas.dufresne@collabora.com>","Cc":"libcamera-devel@lists.libcamera.org,\n\tVedant Paranjape <vedantparanjape160201@gmail.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}}]