[{"id":38281,"web_url":"https://patchwork.libcamera.org/comment/38281/","msgid":"<85ldgil9sn.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","date":"2026-02-24T10:23:36","subject":"Re: [PATCH v2 1/4] software_isp: swstats_cpu: Prepare for\n\tmulti-threading support","submitter":{"id":177,"url":"https://patchwork.libcamera.org/api/people/177/","name":"Milan Zamazal","email":"mzamazal@redhat.com"},"content":"Hi Hans,\n\nthank you for the update.\n\nHans de Goede <johannes.goede@oss.qualcomm.com> writes:\n\n> Make the storage used to accumulate the RGB sums and the Y histogram\n> value a vector of SwIspStats objects instead of a single object so\n> that when using multi-threading every thread can use its own storage to\n> collect intermediate stats to avoid cache-line bouncing.\n>\n> Benchmarking with the GPU-ISP which does separate swstats benchmarking,\n> on the Arduino Uno-Q which has a weak CPU which is good for performance\n> testing, shows 20ms to generate stats for a 3272x2464 frame both before\n> and after this change.\n>\n> Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>\n> ---\n> Changes in v2:\n> - Move the allocation of the vector of SwIspStats objects to inside\n>   the SwStatsCpu class, controlled by a configure() arguments instead\n>   of making the caller allocate the objects\n> ---\n>  .../internal/software_isp/swstats_cpu.h       | 25 ++++-----\n>  src/libcamera/software_isp/swstats_cpu.cpp    | 54 +++++++++++++------\n>  2 files changed, 50 insertions(+), 29 deletions(-)\n>\n> diff --git a/include/libcamera/internal/software_isp/swstats_cpu.h b/include/libcamera/internal/software_isp/swstats_cpu.h\n> index 64b3e23f5..feee92f99 100644\n> --- a/include/libcamera/internal/software_isp/swstats_cpu.h\n> +++ b/include/libcamera/internal/software_isp/swstats_cpu.h\n> @@ -12,6 +12,7 @@\n>  #pragma once\n>  \n>  #include <stdint.h>\n> +#include <vector>\n>  \n>  #include <libcamera/base/signal.h>\n>  \n> @@ -51,13 +52,13 @@ public:\n>  \n>  \tconst Size &patternSize() { return patternSize_; }\n>  \n> -\tint configure(const StreamConfiguration &inputCfg);\n> +\tint configure(const StreamConfiguration &inputCfg, unsigned int statsBufferCount = 1);\n>  \tvoid setWindow(const Rectangle &window);\n>  \tvoid startFrame(uint32_t frame);\n>  \tvoid finishFrame(uint32_t frame, uint32_t bufferId);\n>  \tvoid processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *input);\n>  \n> -\tvoid processLine0(uint32_t frame, unsigned int y, const uint8_t *src[])\n> +\tvoid processLine0(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)\n>  \t{\n>  \t\tif (frame % kStatPerNumFrames)\n>  \t\t\treturn;\n> @@ -66,10 +67,10 @@ public:\n>  \t\t    y >= (window_.y + window_.height))\n>  \t\t\treturn;\n>  \n> -\t\t(this->*stats0_)(src);\n> +\t\t(this->*stats0_)(src, stats_[statsBufferIndex]);\n>  \t}\n>  \n> -\tvoid processLine2(uint32_t frame, unsigned int y, const uint8_t *src[])\n> +\tvoid processLine2(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)\n>  \t{\n>  \t\tif (frame % kStatPerNumFrames)\n>  \t\t\treturn;\n> @@ -78,25 +79,25 @@ public:\n>  \t\t    y >= (window_.y + window_.height))\n>  \t\t\treturn;\n>  \n> -\t\t(this->*stats2_)(src);\n> +\t\t(this->*stats2_)(src, stats_[statsBufferIndex]);\n>  \t}\n>  \n>  \tSignal<uint32_t, uint32_t> statsReady;\n>  \n>  private:\n> -\tusing statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]);\n> +\tusing statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[], SwIspStats &stats);\n>  \tusing processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in);\n>  \n>  \tint setupStandardBayerOrder(BayerFormat::Order order);\n>  \t/* Bayer 8 bpp unpacked */\n> -\tvoid statsBGGR8Line0(const uint8_t *src[]);\n> +\tvoid statsBGGR8Line0(const uint8_t *src[], SwIspStats &stats);\n>  \t/* Bayer 10 bpp unpacked */\n> -\tvoid statsBGGR10Line0(const uint8_t *src[]);\n> +\tvoid statsBGGR10Line0(const uint8_t *src[], SwIspStats &stats);\n>  \t/* Bayer 12 bpp unpacked */\n> -\tvoid statsBGGR12Line0(const uint8_t *src[]);\n> +\tvoid statsBGGR12Line0(const uint8_t *src[], SwIspStats &stats);\n>  \t/* Bayer 10 bpp packed */\n> -\tvoid statsBGGR10PLine0(const uint8_t *src[]);\n> -\tvoid statsGBRG10PLine0(const uint8_t *src[]);\n> +\tvoid statsBGGR10PLine0(const uint8_t *src[], SwIspStats &stats);\n> +\tvoid statsGBRG10PLine0(const uint8_t *src[], SwIspStats &stats);\n>  \n>  \tvoid processBayerFrame2(MappedFrameBuffer &in);\n>  \n> @@ -116,8 +117,8 @@ private:\n>  \tunsigned int xShift_;\n>  \tunsigned int stride_;\n>  \n> +\tstd::vector<SwIspStats> stats_;\n>  \tSharedMemObject<SwIspStats> sharedStats_;\n> -\tSwIspStats stats_;\n>  \tBenchmark bench_;\n>  };\n>  \n> diff --git a/src/libcamera/software_isp/swstats_cpu.cpp b/src/libcamera/software_isp/swstats_cpu.cpp\n> index 1cedcfbc1..7c71aed96 100644\n> --- a/src/libcamera/software_isp/swstats_cpu.cpp\n> +++ b/src/libcamera/software_isp/swstats_cpu.cpp\n> @@ -74,11 +74,12 @@ namespace libcamera {\n>   */\n>  \n>  /**\n> - * \\fn void SwStatsCpu::processLine0(uint32_t frame, unsigned int y, const uint8_t *src[])\n> + * \\fn void SwStatsCpu::processLine0(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)\n>   * \\brief Process line 0\n>   * \\param[in] frame The frame number\n>   * \\param[in] y The y coordinate.\n>   * \\param[in] src The input data.\n> + * \\param[in] statsBufferIndex Index of stats buffer to use for multi-threading.\n>   *\n>   * This function processes line 0 for input formats with\n>   * patternSize height == 1.\n> @@ -97,14 +98,18 @@ namespace libcamera {\n>   * to the line in plane 0, etc.\n>   *\n>   * For non Bayer single plane input data only a single src pointer is required.\n> + *\n> + * The statsBufferIndex value must be less than the statsBufferCount value passed\n> + * to configure().\n>   */\n>  \n>  /**\n> - * \\fn void SwStatsCpu::processLine2(uint32_t frame, unsigned int y, const uint8_t *src[])\n> + * \\fn void SwStatsCpu::processLine2(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)\n>   * \\brief Process line 2 and 3\n>   * \\param[in] frame The frame number\n>   * \\param[in] y The y coordinate.\n>   * \\param[in] src The input data.\n> + * \\param[in] statsBufferIndex Index of stats buffer to use for multi-threading.\n>   *\n>   * This function processes line 2 and 3 for input formats with\n>   * patternSize height == 4.\n> @@ -182,14 +187,14 @@ static constexpr unsigned int kBlueYMul = 29; /* 0.114 * 256 */\n>  \tyVal = r * kRedYMul;               \\\n>  \tyVal += g * kGreenYMul;            \\\n>  \tyVal += b * kBlueYMul;             \\\n> -\tstats_.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;\n> +\tstats.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;\n>  \n>  #define SWSTATS_FINISH_LINE_STATS() \\\n> -\tstats_.sum_.r() += sumR;    \\\n> -\tstats_.sum_.g() += sumG;    \\\n> -\tstats_.sum_.b() += sumB;\n> +\tstats.sum_.r() += sumR;    \\\n> +\tstats.sum_.g() += sumG;    \\\n> +\tstats.sum_.b() += sumB;\n>  \n> -void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])\n> +void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint8_t *src0 = src[1] + window_.x;\n>  \tconst uint8_t *src1 = src[2] + window_.x;\n> @@ -214,7 +219,7 @@ void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])\n>  \tSWSTATS_FINISH_LINE_STATS()\n>  }\n>  \n> -void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])\n> +void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint16_t *src0 = (const uint16_t *)src[1] + window_.x;\n>  \tconst uint16_t *src1 = (const uint16_t *)src[2] + window_.x;\n> @@ -240,7 +245,7 @@ void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])\n>  \tSWSTATS_FINISH_LINE_STATS()\n>  }\n>  \n> -void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])\n> +void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint16_t *src0 = (const uint16_t *)src[1] + window_.x;\n>  \tconst uint16_t *src1 = (const uint16_t *)src[2] + window_.x;\n> @@ -266,7 +271,7 @@ void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])\n>  \tSWSTATS_FINISH_LINE_STATS()\n>  }\n>  \n> -void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])\n> +void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint8_t *src0 = src[1] + window_.x * 5 / 4;\n>  \tconst uint8_t *src1 = src[2] + window_.x * 5 / 4;\n> @@ -292,7 +297,7 @@ void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])\n>  \tSWSTATS_FINISH_LINE_STATS()\n>  }\n>  \n> -void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])\n> +void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint8_t *src0 = src[1] + window_.x * 5 / 4;\n>  \tconst uint8_t *src1 = src[2] + window_.x * 5 / 4;\n> @@ -332,8 +337,10 @@ void SwStatsCpu::startFrame(uint32_t frame)\n>  \tif (window_.width == 0)\n>  \t\tLOG(SwStatsCpu, Error) << \"Calling startFrame() without setWindow()\";\n>  \n> -\tstats_.sum_ = RGB<uint64_t>({ 0, 0, 0 });\n> -\tstats_.yHistogram.fill(0);\n> +\tfor (unsigned int i = 0; i < stats_.size(); i++) {\n\nHow about:\n\n  for (auto &s : stats_)\n\n> +\t\tstats_[i].sum_ = RGB<uint64_t>({ 0, 0, 0 });\n> +\t\tstats_[i].yHistogram.fill(0);\n> +\t}\n>  }\n>  \n>  /**\n> @@ -345,8 +352,19 @@ void SwStatsCpu::startFrame(uint32_t frame)\n>   */\n>  void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId)\n>  {\n> -\tstats_.valid = frame % kStatPerNumFrames == 0;\n> -\t*sharedStats_ = stats_;\n> +\tbool valid = frame % kStatPerNumFrames == 0;\n> +\n> +\tif (valid) {\n> +\t\tsharedStats_->sum_ = RGB<uint64_t>({ 0, 0, 0 });\n> +\t\tsharedStats_->yHistogram.fill(0);\n> +\t\tfor (unsigned int i = 0; i < stats_.size(); i++) {\n\nThe same here.\n\nOther than these, the change looks good to me.\n\n> +\t\t\tsharedStats_->sum_ += stats_[i].sum_;\n> +\t\t\tfor (unsigned int j = 0; j < SwIspStats::kYHistogramSize; j++)\n> +\t\t\t\tsharedStats_->yHistogram[j] += stats_[i].yHistogram[j];\n> +\t\t}\n> +\t}\n> +\n> +\tsharedStats_->valid = valid;\n>  \tstatsReady.emit(frame, bufferId);\n>  }\n>  \n> @@ -389,12 +407,14 @@ int SwStatsCpu::setupStandardBayerOrder(BayerFormat::Order order)\n>  /**\n>   * \\brief Configure the statistics object for the passed in input format\n>   * \\param[in] inputCfg The input format\n> + * \\param[in] statsBufferCount number of internal stats buffers to use for multi-threading\n>   *\n>   * \\return 0 on success, a negative errno value on failure\n>   */\n> -int SwStatsCpu::configure(const StreamConfiguration &inputCfg)\n> +int SwStatsCpu::configure(const StreamConfiguration &inputCfg, unsigned int statsBufferCount)\n>  {\n>  \tstride_ = inputCfg.stride;\n> +\tstats_.resize(statsBufferCount);\n>  \n>  \tBayerFormat bayerFormat =\n>  \t\tBayerFormat::fromPixelFormat(inputCfg.pixelFormat);\n> @@ -504,7 +524,7 @@ void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)\n>  \t\t/* linePointers[0] is not used by any stats0_ functions */\n>  \t\tlinePointers[1] = src;\n>  \t\tlinePointers[2] = src + stride_;\n> -\t\t(this->*stats0_)(linePointers);\n> +\t\t(this->*stats0_)(linePointers, stats_[0]);\n>  \t\tsrc += stride_ * 2;\n>  \t}\n>  }","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id 7919BC0DA4\n\tfor <parsemail@patchwork.libcamera.org>;\n\tTue, 24 Feb 2026 10:23:45 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 8455362293;\n\tTue, 24 Feb 2026 11:23:44 +0100 (CET)","from us-smtp-delivery-124.mimecast.com\n\t(us-smtp-delivery-124.mimecast.com [170.10.133.124])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id A567E620FA\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 24 Feb 2026 11:23:42 +0100 (CET)","from mail-wm1-f70.google.com (mail-wm1-f70.google.com\n\t[209.85.128.70]) by relay.mimecast.com with ESMTP with STARTTLS\n\t(version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id\n\tus-mta-264-k4jPJZIgMOyWiRkAmMTpAQ-1; Tue, 24 Feb 2026 05:23:39 -0500","by mail-wm1-f70.google.com with SMTP id\n\t5b1f17b1804b1-483a24db6ecso56355635e9.1\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 24 Feb 2026 02:23:39 -0800 (PST)","from mzamazal-thinkpadp1gen7.tpbc.csb\n\t(ip-77-48-47-2.net.vodafone.cz. [77.48.47.2])\n\tby smtp.gmail.com with ESMTPSA id\n\t5b1f17b1804b1-483a31b3e0dsm499755395e9.1.2026.02.24.02.23.36\n\t(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n\tTue, 24 Feb 2026 02:23:36 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=redhat.com header.i=@redhat.com\n\theader.b=\"fHN+F80h\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1771928621;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\tto:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\tin-reply-to:in-reply-to:references:references;\n\tbh=ogTzyWviDAs2Fl835IQuVMfLJmvVOcgr/EI81X9dAhA=;\n\tb=fHN+F80hGZMmMEPjFJ5UeG7UWZXFSCGGHYmo5kHTmzl8l03Ve9MrP56NVqqWu8UT/YMrQg\n\t6Zp33iojvyINFxNlDhCxWD/e7B/asPe+ZRksGDbe78YEXQG+vVojkKueOVx+CxhheQJf1Y\n\tlo429EZ4OmwgEWw6Ul9fm/pJLeps+wk=","X-MC-Unique":"k4jPJZIgMOyWiRkAmMTpAQ-1","X-Mimecast-MFC-AGG-ID":"k4jPJZIgMOyWiRkAmMTpAQ_1771928618","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1771928618; x=1772533418;\n\th=mime-version:user-agent:message-id:date:references:in-reply-to\n\t:subject:cc:to:from:x-gm-gg:x-gm-message-state:from:to:cc:subject\n\t:date:message-id:reply-to;\n\tbh=ogTzyWviDAs2Fl835IQuVMfLJmvVOcgr/EI81X9dAhA=;\n\tb=Tan29D/0R3jb4iffxU9vnIsadGBFF8a+VElUjX+fnkz5G9aUm+/4bg8PNHzegzVe3g\n\tyIn9WcIlFByTM9NTrab7YJL5bPmUq698jVZNhVsbJ4AN71s52+Ax6UWQE0eFy2T2Oy16\n\t8TmCYrA9yc8bsK7zupa6XywnIOwfIvoWuFsmZLGh4dIZ15uToLYRq7FQpwXnxI3j7I0b\n\tPaTaro+Ote195WPEeuZbU3yS1PGC48nCvcLdj2Nv8S9jc00+8RHbhZpSMXtSwDln8bIk\n\tBbMEx3pxJhbW+IQ28BNuhH0Turiso3qX8hVYz4S6PBjI47q5ZQffo48g03i06qPatlrk\n\tpM/g==","X-Gm-Message-State":"AOJu0YzR0TFKU+tWH6iEEstpRrt0xCQr3BzV6T6+YkeCqBI6fHcz4bdB\n\tP/ElDu+GRk+orpEi8jPt6s9cHyXqK5pP3kpDFwavtBDQrXJziVJ92FxsmzBHlLkmTncljwo8xUE\n\tj3YIjpsM9WM6cUVzzXsmmlHUDHipvUHMvj81KpwRoL8DUK3j+3ehPduiOu/7GZcXY0B/SkGZ8KR\n\t2CSQ+tg/8w5o/BNtU57qmKLWH631Ts+knUCJGYyOjSY9wh5LUQ648f5w7USP8=","X-Gm-Gg":"AZuq6aKi0fIXv/6ZeQIIpCYv27slSXUknfvphoET/exbWiJFP9jMtsBbUvOrF/mvigB\n\tUQCIejirHLNxeyHFGwz5wpMLtYNO+KwIoQinQ35IsFBUebrxvOHrqGkMxcb3aStOEaESUsZY9cQ\n\t0CwMbjfcP8tDTjzLqm4wJ7MBoWrmD4NGG/ys+P3RlcITC3fDo7I5bp0IAxz+pwCrOtmlvMVV2Uh\n\t/+weeElihDcfMOts76PX2FVbTf/9kVQoENpOJDYuYAvxmGxOnKMaTB8wXg7ebQWhiTVYgEj0O1y\n\tsJKWLax/W6HZI2qBxxzTAXc++QJjFwJ3+SEP9OPj4Byss8OAJW3qDrm8HI05I5d3V5q4A3ylg+0\n\tryxR6STcjwsGBhaaXtdOsf/L14f0uACdwT+azyhDsSw1lCjSTnmgatlMPdDKb3cDwrbnrKYK1QO\n\ts=","X-Received":["by 2002:a05:600c:4e8a:b0:47e:e91d:73c0 with SMTP id\n\t5b1f17b1804b1-483a962e37emr182620195e9.19.1771928618036; \n\tTue, 24 Feb 2026 02:23:38 -0800 (PST)","by 2002:a05:600c:4e8a:b0:47e:e91d:73c0 with SMTP id\n\t5b1f17b1804b1-483a962e37emr182619745e9.19.1771928617467; \n\tTue, 24 Feb 2026 02:23:37 -0800 (PST)"],"From":"Milan Zamazal <mzamazal@redhat.com>","To":"Hans de Goede <johannes.goede@oss.qualcomm.com>","Cc":"libcamera-devel@lists.libcamera.org","Subject":"Re: [PATCH v2 1/4] software_isp: swstats_cpu: Prepare for\n\tmulti-threading support","In-Reply-To":"<20260223160930.27913-2-johannes.goede@oss.qualcomm.com> (Hans\n\tde Goede's message of \"Mon, 23 Feb 2026 17:09:27 +0100\")","References":"<20260223160930.27913-1-johannes.goede@oss.qualcomm.com>\n\t<20260223160930.27913-2-johannes.goede@oss.qualcomm.com>","Date":"Tue, 24 Feb 2026 11:23:36 +0100","Message-ID":"<85ldgil9sn.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","User-Agent":"Gnus/5.13 (Gnus v5.13)","MIME-Version":"1.0","X-Mimecast-Spam-Score":"0","X-Mimecast-MFC-PROC-ID":"AiNYaVYhde3x8S1U28V6sEhi_yMuwGNg2tO4OWdK3v0_1771928618","X-Mimecast-Originator":"redhat.com","Content-Type":"text/plain","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}}]