[{"id":29659,"web_url":"https://patchwork.libcamera.org/comment/29659/","msgid":"<171699176284.191612.14933567182627785087@ping.linuxembedded.co.uk>","date":"2024-05-29T14:09:22","subject":"Re: [PATCH v1 3/6] test: fence: Fix race condition","submitter":{"id":4,"url":"https://patchwork.libcamera.org/api/people/4/","name":"Kieran Bingham","email":"kieran.bingham@ideasonboard.com"},"content":"Quoting Laurent Pinchart (2024-04-25 00:42:21)\n> The fence test is racy, as it relies on the main loop being executed\n> between completion of request signalledRequestId_ and\n> signalledRequestId_ + 1. This usually happens, but is not guaranteed.\n> \n> To fix the race condition, change the request identification logic by\n> replacing usage of the cookie value, is zero-based and wraps around at\n\nDo you mean \"which is zero-based\"\n\n> nbuffers_ - 1, with a completed request counter that is one-based and\n> doesn't wrap. The completedRequestId_, expiredRequestId_ and\n> signalledRequestId_ variables now track the identifier of the last\n> request that has completed, the request whose fence will time out, and\n> the request whose fence will be signalled.\n> \n> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>\n> ---\n>  test/fence.cpp | 29 ++++++++++++++---------------\n>  1 file changed, 14 insertions(+), 15 deletions(-)\n> \n> diff --git a/test/fence.cpp b/test/fence.cpp\n> index e6f79d2faa21..7949bfbb176b 100644\n> --- a/test/fence.cpp\n> +++ b/test/fence.cpp\n> @@ -57,8 +57,7 @@ private:\n>         bool expectedCompletionResult_ = true;\n>         bool setFence_ = true;\n>  \n> -       unsigned int completedRequest_;\n> -\n> +       unsigned int completedRequestId_;\n>         unsigned int signalledRequestId_;\n>         unsigned int expiredRequestId_;\n>         unsigned int nbuffers_;\n> @@ -126,8 +125,9 @@ int FenceTest::init()\n>                 return TestFail;\n>         }\n>  \n> -       signalledRequestId_ = nbuffers_ - 2;\n> -       expiredRequestId_ = nbuffers_ - 1;\n> +       completedRequestId_ = 0;\n> +       expiredRequestId_ = nbuffers_;\n> +       signalledRequestId_ = nbuffers_ * 2;\n\nI don't (yet) understand hy this is nbuffers_ * 2 ... while before it\nwas nbuffers_ - 2 ... that stands out a lot ... but maybe reading\nfurther down will explain.\n\nMaybe that makes it worthy of a comment above to say why those values /\nid targets are chosen.\n\nI've read down, and the * 2 isn't clearer. Perhaps it's just to make\nsure that enough requests are queued?\n\n>  \n>         return TestPass;\n>  }\n> @@ -189,16 +189,16 @@ void FenceTest::requestRequeue(Request *request)\n>         const Request::BufferMap &buffers = request->buffers();\n>         const Stream *stream = buffers.begin()->first;\n>         FrameBuffer *buffer = buffers.begin()->second;\n> -       uint64_t cookie = request->cookie();\n>  \n>         request->reuse();\n>  \n> -       if (cookie == signalledRequestId_ && setFence_) {\n> +       if (completedRequestId_ == signalledRequestId_ - nbuffers_ && setFence_) {\n>                 /*\n> -                * The second time this request is queued add a fence to it.\n> -                *\n> -                * The main loop signals it by using a timer to write to the\n> -                * efd2_ file descriptor before the fence expires.\n> +                * This is the request that will be used to test fence\n> +                * signalling when it completes next time. Add a fence to it,\n> +                * using efd2_. The main loop will signal the fence by using a\n> +                * timer to write to the efd2_ file descriptor before the fence\n> +                * expires.\n>                  */\n>                 std::unique_ptr<Fence> fence =\n>                         std::make_unique<Fence>(std::move(eventFd2_));\n> @@ -213,8 +213,7 @@ void FenceTest::requestRequeue(Request *request)\n>  \n>  void FenceTest::requestComplete(Request *request)\n>  {\n> -       uint64_t cookie = request->cookie();\n> -       completedRequest_ = cookie;\n> +       completedRequestId_++;\n>  \n>         /*\n>          * The last request is expected to fail as its fence has not been\n> @@ -222,7 +221,7 @@ void FenceTest::requestComplete(Request *request)\n>          *\n>          * Validate the fence status but do not re-queue it.\n>          */\n> -       if (cookie == expiredRequestId_) {\n> +       if (completedRequestId_ == expiredRequestId_) {\n>                 if (validateExpiredRequest(request) != TestPass)\n>                         expectedCompletionResult_ = false;\n>  \n> @@ -271,7 +270,7 @@ int FenceTest::run()\n>                 }\n>  \n>                 int ret;\n> -               if (i == expiredRequestId_) {\n> +               if (i == expiredRequestId_ - 1) {\n>                         /* This request will have a fence, and it will expire. */\n>                         std::unique_ptr<Fence> fence =\n>                                 std::make_unique<Fence>(std::move(eventFd_));\n> @@ -318,7 +317,7 @@ int FenceTest::run()\n>         Timer timer;\n>         timer.start(1000ms);\n>         while (timer.isRunning() && expectedCompletionResult_) {\n> -               if (completedRequest_ == signalledRequestId_ && setFence_)\n> +               if (completedRequestId_ == signalledRequestId_ - 1 && setFence_)\n>                         /*\n>                          * signalledRequestId_ has just completed and it has\n>                          * been re-queued with a fence. Start the timer to\n> -- \n> Regards,\n> \n> Laurent Pinchart\n>","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id AE76BBD87C\n\tfor <parsemail@patchwork.libcamera.org>;\n\tWed, 29 May 2024 14:09:28 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id ADA3D634B7;\n\tWed, 29 May 2024 16:09:27 +0200 (CEST)","from perceval.ideasonboard.com (perceval.ideasonboard.com\n\t[IPv6:2001:4b98:dc2:55:216:3eff:fef7:d647])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 93B89634AF\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tWed, 29 May 2024 16:09:25 +0200 (CEST)","from pendragon.ideasonboard.com\n\t(cpc89244-aztw30-2-0-cust6594.18-1.cable.virginm.net [86.31.185.195])\n\tby perceval.ideasonboard.com (Postfix) with ESMTPSA id D7A47A27;\n\tWed, 29 May 2024 16:09:21 +0200 (CEST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=ideasonboard.com header.i=@ideasonboard.com\n\theader.b=\"pvvqTmyV\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/simple; d=ideasonboard.com;\n\ts=mail; t=1716991761;\n\tbh=Dr6IuPhvJYj+g1MD0fuPUiup52twi8WU7iItK66SbaU=;\n\th=In-Reply-To:References:Subject:From:To:Date:From;\n\tb=pvvqTmyVLMr4FdMwOPz2es/pMGc9F/5tp7lOvTY6c11LkSuCF830PUjpLqWeC298L\n\tk00TAXTXv8WSXhI/wju1Z/uWouLtvlUEkaGhd4+lk+04qnFT1haWYThh/5yq6yF9Bh\n\tSgjWtij0/0IQdnY8u6BQz6yxEk7NCCZksW5bUyxA=","Content-Type":"text/plain; charset=\"utf-8\"","MIME-Version":"1.0","Content-Transfer-Encoding":"quoted-printable","In-Reply-To":"<20240424234224.9658-4-laurent.pinchart@ideasonboard.com>","References":"<20240424234224.9658-1-laurent.pinchart@ideasonboard.com>\n\t<20240424234224.9658-4-laurent.pinchart@ideasonboard.com>","Subject":"Re: [PATCH v1 3/6] test: fence: Fix race condition","From":"Kieran Bingham <kieran.bingham@ideasonboard.com>","To":"Laurent Pinchart <laurent.pinchart@ideasonboard.com>,\n\tlibcamera-devel@lists.libcamera.org","Date":"Wed, 29 May 2024 15:09:22 +0100","Message-ID":"<171699176284.191612.14933567182627785087@ping.linuxembedded.co.uk>","User-Agent":"alot/0.10","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":29664,"web_url":"https://patchwork.libcamera.org/comment/29664/","msgid":"<20240529145339.GQ1436@pendragon.ideasonboard.com>","date":"2024-05-29T14:53:39","subject":"Re: [PATCH v1 3/6] test: fence: Fix race condition","submitter":{"id":2,"url":"https://patchwork.libcamera.org/api/people/2/","name":"Laurent Pinchart","email":"laurent.pinchart@ideasonboard.com"},"content":"On Wed, May 29, 2024 at 03:09:22PM +0100, Kieran Bingham wrote:\n> Quoting Laurent Pinchart (2024-04-25 00:42:21)\n> > The fence test is racy, as it relies on the main loop being executed\n> > between completion of request signalledRequestId_ and\n> > signalledRequestId_ + 1. This usually happens, but is not guaranteed.\n> > \n> > To fix the race condition, change the request identification logic by\n> > replacing usage of the cookie value, is zero-based and wraps around at\n> \n> Do you mean \"which is zero-based\"\n> \n> > nbuffers_ - 1, with a completed request counter that is one-based and\n> > doesn't wrap. The completedRequestId_, expiredRequestId_ and\n> > signalledRequestId_ variables now track the identifier of the last\n> > request that has completed, the request whose fence will time out, and\n> > the request whose fence will be signalled.\n> > \n> > Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>\n> > ---\n> >  test/fence.cpp | 29 ++++++++++++++---------------\n> >  1 file changed, 14 insertions(+), 15 deletions(-)\n> > \n> > diff --git a/test/fence.cpp b/test/fence.cpp\n> > index e6f79d2faa21..7949bfbb176b 100644\n> > --- a/test/fence.cpp\n> > +++ b/test/fence.cpp\n> > @@ -57,8 +57,7 @@ private:\n> >         bool expectedCompletionResult_ = true;\n> >         bool setFence_ = true;\n> >  \n> > -       unsigned int completedRequest_;\n> > -\n> > +       unsigned int completedRequestId_;\n> >         unsigned int signalledRequestId_;\n> >         unsigned int expiredRequestId_;\n> >         unsigned int nbuffers_;\n> > @@ -126,8 +125,9 @@ int FenceTest::init()\n> >                 return TestFail;\n> >         }\n> >  \n> > -       signalledRequestId_ = nbuffers_ - 2;\n> > -       expiredRequestId_ = nbuffers_ - 1;\n> > +       completedRequestId_ = 0;\n> > +       expiredRequestId_ = nbuffers_;\n> > +       signalledRequestId_ = nbuffers_ * 2;\n> \n> I don't (yet) understand hy this is nbuffers_ * 2 ... while before it\n> was nbuffers_ - 2 ... that stands out a lot ... but maybe reading\n> further down will explain.\n> \n> Maybe that makes it worthy of a comment above to say why those values /\n> id targets are chosen.\n> \n> I've read down, and the * 2 isn't clearer. Perhaps it's just to make\n> sure that enough requests are queued?\n\nI also had trouble with this code, so I think it's worth a comment\nindeed. I'll send a v2.\n\n> >  \n> >         return TestPass;\n> >  }\n> > @@ -189,16 +189,16 @@ void FenceTest::requestRequeue(Request *request)\n> >         const Request::BufferMap &buffers = request->buffers();\n> >         const Stream *stream = buffers.begin()->first;\n> >         FrameBuffer *buffer = buffers.begin()->second;\n> > -       uint64_t cookie = request->cookie();\n> >  \n> >         request->reuse();\n> >  \n> > -       if (cookie == signalledRequestId_ && setFence_) {\n> > +       if (completedRequestId_ == signalledRequestId_ - nbuffers_ && setFence_) {\n> >                 /*\n> > -                * The second time this request is queued add a fence to it.\n> > -                *\n> > -                * The main loop signals it by using a timer to write to the\n> > -                * efd2_ file descriptor before the fence expires.\n> > +                * This is the request that will be used to test fence\n> > +                * signalling when it completes next time. Add a fence to it,\n> > +                * using efd2_. The main loop will signal the fence by using a\n> > +                * timer to write to the efd2_ file descriptor before the fence\n> > +                * expires.\n> >                  */\n> >                 std::unique_ptr<Fence> fence =\n> >                         std::make_unique<Fence>(std::move(eventFd2_));\n> > @@ -213,8 +213,7 @@ void FenceTest::requestRequeue(Request *request)\n> >  \n> >  void FenceTest::requestComplete(Request *request)\n> >  {\n> > -       uint64_t cookie = request->cookie();\n> > -       completedRequest_ = cookie;\n> > +       completedRequestId_++;\n> >  \n> >         /*\n> >          * The last request is expected to fail as its fence has not been\n> > @@ -222,7 +221,7 @@ void FenceTest::requestComplete(Request *request)\n> >          *\n> >          * Validate the fence status but do not re-queue it.\n> >          */\n> > -       if (cookie == expiredRequestId_) {\n> > +       if (completedRequestId_ == expiredRequestId_) {\n> >                 if (validateExpiredRequest(request) != TestPass)\n> >                         expectedCompletionResult_ = false;\n> >  \n> > @@ -271,7 +270,7 @@ int FenceTest::run()\n> >                 }\n> >  \n> >                 int ret;\n> > -               if (i == expiredRequestId_) {\n> > +               if (i == expiredRequestId_ - 1) {\n> >                         /* This request will have a fence, and it will expire. */\n> >                         std::unique_ptr<Fence> fence =\n> >                                 std::make_unique<Fence>(std::move(eventFd_));\n> > @@ -318,7 +317,7 @@ int FenceTest::run()\n> >         Timer timer;\n> >         timer.start(1000ms);\n> >         while (timer.isRunning() && expectedCompletionResult_) {\n> > -               if (completedRequest_ == signalledRequestId_ && setFence_)\n> > +               if (completedRequestId_ == signalledRequestId_ - 1 && setFence_)\n> >                         /*\n> >                          * signalledRequestId_ has just completed and it has\n> >                          * been re-queued with a fence. Start the timer to","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id C4F74BD87C\n\tfor <parsemail@patchwork.libcamera.org>;\n\tWed, 29 May 2024 14:53:53 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 087D1634AF;\n\tWed, 29 May 2024 16:53:53 +0200 (CEST)","from perceval.ideasonboard.com (perceval.ideasonboard.com\n\t[IPv6:2001:4b98:dc2:55:216:3eff:fef7:d647])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id A46ED6347E\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tWed, 29 May 2024 16:53:51 +0200 (CEST)","from pendragon.ideasonboard.com (81-175-209-231.bb.dnainternet.fi\n\t[81.175.209.231])\n\tby perceval.ideasonboard.com (Postfix) with ESMTPSA id D3ED3A27;\n\tWed, 29 May 2024 16:53:47 +0200 (CEST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=ideasonboard.com header.i=@ideasonboard.com\n\theader.b=\"O/8Nzo8r\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/simple; d=ideasonboard.com;\n\ts=mail; t=1716994428;\n\tbh=Xk0tO2A0zMna/qcTIAp/SWQjoxxIfIP20pduPC1eH4c=;\n\th=Date:From:To:Cc:Subject:References:In-Reply-To:From;\n\tb=O/8Nzo8rgTM0EcD+xvBU5ZTdRK0PTxdMhiq0A3PaDM9vxuguXRpK6OXWsnVBu66AL\n\tiq4qt+JARNId3xUSgR1r6NSrKURPReM/nLUPD4ecQjCG1abBgCfGsHn8BYryALE16y\n\tngMVlRSmDKKpJzacNPr82l8uyzsaq1GpsAqS98Ds=","Date":"Wed, 29 May 2024 17:53:39 +0300","From":"Laurent Pinchart <laurent.pinchart@ideasonboard.com>","To":"Kieran Bingham <kieran.bingham@ideasonboard.com>","Cc":"libcamera-devel@lists.libcamera.org","Subject":"Re: [PATCH v1 3/6] test: fence: Fix race condition","Message-ID":"<20240529145339.GQ1436@pendragon.ideasonboard.com>","References":"<20240424234224.9658-1-laurent.pinchart@ideasonboard.com>\n\t<20240424234224.9658-4-laurent.pinchart@ideasonboard.com>\n\t<171699176284.191612.14933567182627785087@ping.linuxembedded.co.uk>","MIME-Version":"1.0","Content-Type":"text/plain; charset=utf-8","Content-Disposition":"inline","In-Reply-To":"<171699176284.191612.14933567182627785087@ping.linuxembedded.co.uk>","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}}]