[{"id":38298,"web_url":"https://patchwork.libcamera.org/comment/38298/","msgid":"<85a4wxchh7.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","date":"2026-02-25T09:14:44","subject":"Re: [PATCH v3 1/4] software_isp: swstats_cpu: Prepare for\n\tmulti-threading support","submitter":{"id":177,"url":"https://patchwork.libcamera.org/api/people/177/","name":"Milan Zamazal","email":"mzamazal@redhat.com"},"content":"Hi Hans,\n\nthank you for the update.\n\nHans de Goede <johannes.goede@oss.qualcomm.com> writes:\n\n> Make the storage used to accumulate the RGB sums and the Y histogram\n> value a vector of SwIspStats objects instead of a single object so\n> that when using multi-threading every thread can use its own storage to\n> collect intermediate stats to avoid cache-line bouncing.\n>\n> Benchmarking with the GPU-ISP which does separate swstats benchmarking,\n> on the Arduino Uno-Q which has a weak CPU which is good for performance\n> testing, shows 20ms to generate stats for a 3272x2464 frame both before\n> and after this change.\n>\n> Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>\n> ---\n> Changes in v3:\n> - Use for (auto &s : stats_) {}\n>\n> Changes in v2:\n> - Move the allocation of the vector of SwIspStats objects to inside\n>   the SwStatsCpu class, controlled by a configure() arguments instead\n>   of making the caller allocate the objects\n> ---\n>  .../internal/software_isp/swstats_cpu.h       | 25 ++++-----\n>  src/libcamera/software_isp/swstats_cpu.cpp    | 54 +++++++++++++------\n>  2 files changed, 50 insertions(+), 29 deletions(-)\n>\n> diff --git a/include/libcamera/internal/software_isp/swstats_cpu.h b/include/libcamera/internal/software_isp/swstats_cpu.h\n> index 64b3e23f..feee92f9 100644\n> --- a/include/libcamera/internal/software_isp/swstats_cpu.h\n> +++ b/include/libcamera/internal/software_isp/swstats_cpu.h\n> @@ -12,6 +12,7 @@\n>  #pragma once\n>  \n>  #include <stdint.h>\n> +#include <vector>\n>  \n>  #include <libcamera/base/signal.h>\n>  \n> @@ -51,13 +52,13 @@ public:\n>  \n>  \tconst Size &patternSize() { return patternSize_; }\n>  \n> -\tint configure(const StreamConfiguration &inputCfg);\n> +\tint configure(const StreamConfiguration &inputCfg, unsigned int statsBufferCount = 1);\n>  \tvoid setWindow(const Rectangle &window);\n>  \tvoid startFrame(uint32_t frame);\n>  \tvoid finishFrame(uint32_t frame, uint32_t bufferId);\n>  \tvoid processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *input);\n>  \n> -\tvoid processLine0(uint32_t frame, unsigned int y, const uint8_t *src[])\n> +\tvoid processLine0(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)\n>  \t{\n>  \t\tif (frame % kStatPerNumFrames)\n>  \t\t\treturn;\n> @@ -66,10 +67,10 @@ public:\n>  \t\t    y >= (window_.y + window_.height))\n>  \t\t\treturn;\n>  \n> -\t\t(this->*stats0_)(src);\n> +\t\t(this->*stats0_)(src, stats_[statsBufferIndex]);\n>  \t}\n>  \n> -\tvoid processLine2(uint32_t frame, unsigned int y, const uint8_t *src[])\n> +\tvoid processLine2(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)\n>  \t{\n>  \t\tif (frame % kStatPerNumFrames)\n>  \t\t\treturn;\n> @@ -78,25 +79,25 @@ public:\n>  \t\t    y >= (window_.y + window_.height))\n>  \t\t\treturn;\n>  \n> -\t\t(this->*stats2_)(src);\n> +\t\t(this->*stats2_)(src, stats_[statsBufferIndex]);\n>  \t}\n>  \n>  \tSignal<uint32_t, uint32_t> statsReady;\n>  \n>  private:\n> -\tusing statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]);\n> +\tusing statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[], SwIspStats &stats);\n>  \tusing processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in);\n>  \n>  \tint setupStandardBayerOrder(BayerFormat::Order order);\n>  \t/* Bayer 8 bpp unpacked */\n> -\tvoid statsBGGR8Line0(const uint8_t *src[]);\n> +\tvoid statsBGGR8Line0(const uint8_t *src[], SwIspStats &stats);\n>  \t/* Bayer 10 bpp unpacked */\n> -\tvoid statsBGGR10Line0(const uint8_t *src[]);\n> +\tvoid statsBGGR10Line0(const uint8_t *src[], SwIspStats &stats);\n>  \t/* Bayer 12 bpp unpacked */\n> -\tvoid statsBGGR12Line0(const uint8_t *src[]);\n> +\tvoid statsBGGR12Line0(const uint8_t *src[], SwIspStats &stats);\n>  \t/* Bayer 10 bpp packed */\n> -\tvoid statsBGGR10PLine0(const uint8_t *src[]);\n> -\tvoid statsGBRG10PLine0(const uint8_t *src[]);\n> +\tvoid statsBGGR10PLine0(const uint8_t *src[], SwIspStats &stats);\n> +\tvoid statsGBRG10PLine0(const uint8_t *src[], SwIspStats &stats);\n>  \n>  \tvoid processBayerFrame2(MappedFrameBuffer &in);\n>  \n> @@ -116,8 +117,8 @@ private:\n>  \tunsigned int xShift_;\n>  \tunsigned int stride_;\n>  \n> +\tstd::vector<SwIspStats> stats_;\n>  \tSharedMemObject<SwIspStats> sharedStats_;\n> -\tSwIspStats stats_;\n>  \tBenchmark bench_;\n>  };\n>  \n> diff --git a/src/libcamera/software_isp/swstats_cpu.cpp b/src/libcamera/software_isp/swstats_cpu.cpp\n> index 1cedcfbc..41b73e43 100644\n> --- a/src/libcamera/software_isp/swstats_cpu.cpp\n> +++ b/src/libcamera/software_isp/swstats_cpu.cpp\n> @@ -74,11 +74,12 @@ namespace libcamera {\n>   */\n>  \n>  /**\n> - * \\fn void SwStatsCpu::processLine0(uint32_t frame, unsigned int y, const uint8_t *src[])\n> + * \\fn void SwStatsCpu::processLine0(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)\n>   * \\brief Process line 0\n>   * \\param[in] frame The frame number\n>   * \\param[in] y The y coordinate.\n>   * \\param[in] src The input data.\n> + * \\param[in] statsBufferIndex Index of stats buffer to use for multi-threading.\n>   *\n>   * This function processes line 0 for input formats with\n>   * patternSize height == 1.\n> @@ -97,14 +98,18 @@ namespace libcamera {\n>   * to the line in plane 0, etc.\n>   *\n>   * For non Bayer single plane input data only a single src pointer is required.\n> + *\n> + * The statsBufferIndex value must be less than the statsBufferCount value passed\n> + * to configure().\n>   */\n>  \n>  /**\n> - * \\fn void SwStatsCpu::processLine2(uint32_t frame, unsigned int y, const uint8_t *src[])\n> + * \\fn void SwStatsCpu::processLine2(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)\n>   * \\brief Process line 2 and 3\n>   * \\param[in] frame The frame number\n>   * \\param[in] y The y coordinate.\n>   * \\param[in] src The input data.\n> + * \\param[in] statsBufferIndex Index of stats buffer to use for multi-threading.\n>   *\n>   * This function processes line 2 and 3 for input formats with\n>   * patternSize height == 4.\n> @@ -182,14 +187,14 @@ static constexpr unsigned int kBlueYMul = 29; /* 0.114 * 256 */\n>  \tyVal = r * kRedYMul;               \\\n>  \tyVal += g * kGreenYMul;            \\\n>  \tyVal += b * kBlueYMul;             \\\n> -\tstats_.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;\n> +\tstats.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;\n>  \n>  #define SWSTATS_FINISH_LINE_STATS() \\\n> -\tstats_.sum_.r() += sumR;    \\\n> -\tstats_.sum_.g() += sumG;    \\\n> -\tstats_.sum_.b() += sumB;\n> +\tstats.sum_.r() += sumR;    \\\n> +\tstats.sum_.g() += sumG;    \\\n> +\tstats.sum_.b() += sumB;\n>  \n> -void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])\n> +void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint8_t *src0 = src[1] + window_.x;\n>  \tconst uint8_t *src1 = src[2] + window_.x;\n> @@ -214,7 +219,7 @@ void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])\n>  \tSWSTATS_FINISH_LINE_STATS()\n>  }\n>  \n> -void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])\n> +void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint16_t *src0 = (const uint16_t *)src[1] + window_.x;\n>  \tconst uint16_t *src1 = (const uint16_t *)src[2] + window_.x;\n> @@ -240,7 +245,7 @@ void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])\n>  \tSWSTATS_FINISH_LINE_STATS()\n>  }\n>  \n> -void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])\n> +void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint16_t *src0 = (const uint16_t *)src[1] + window_.x;\n>  \tconst uint16_t *src1 = (const uint16_t *)src[2] + window_.x;\n> @@ -266,7 +271,7 @@ void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])\n>  \tSWSTATS_FINISH_LINE_STATS()\n>  }\n>  \n> -void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])\n> +void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint8_t *src0 = src[1] + window_.x * 5 / 4;\n>  \tconst uint8_t *src1 = src[2] + window_.x * 5 / 4;\n> @@ -292,7 +297,7 @@ void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])\n>  \tSWSTATS_FINISH_LINE_STATS()\n>  }\n>  \n> -void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])\n> +void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[], SwIspStats &stats)\n>  {\n>  \tconst uint8_t *src0 = src[1] + window_.x * 5 / 4;\n>  \tconst uint8_t *src1 = src[2] + window_.x * 5 / 4;\n> @@ -332,8 +337,10 @@ void SwStatsCpu::startFrame(uint32_t frame)\n>  \tif (window_.width == 0)\n>  \t\tLOG(SwStatsCpu, Error) << \"Calling startFrame() without setWindow()\";\n>  \n> -\tstats_.sum_ = RGB<uint64_t>({ 0, 0, 0 });\n> -\tstats_.yHistogram.fill(0);\n> +\tfor (auto &s : stats_) {\n> +\t\ts.sum_ = RGB<uint64_t>({ 0, 0, 0 });\n> +\t\ts.yHistogram.fill(0);\n> +\t}\n>  }\n>  \n>  /**\n> @@ -345,8 +352,19 @@ void SwStatsCpu::startFrame(uint32_t frame)\n>   */\n>  void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId)\n>  {\n> -\tstats_.valid = frame % kStatPerNumFrames == 0;\n> -\t*sharedStats_ = stats_;\n> +\tbool valid = frame % kStatPerNumFrames == 0;\n> +\n> +\tif (valid) {\n> +\t\tsharedStats_->sum_ = RGB<uint64_t>({ 0, 0, 0 });\n> +\t\tsharedStats_->yHistogram.fill(0);\n> +\t\tfor (auto &s : stats_) {\n\nIt can be `const auto' here (sorry for not mentioning it previously).\nWith that:\n\nReviewed-by: Milan Zamazal <mzamazal@redhat.com>\n\n> +\t\t\tsharedStats_->sum_ += s.sum_;\n> +\t\t\tfor (unsigned int i = 0; i < SwIspStats::kYHistogramSize; i++)\n> +\t\t\t\tsharedStats_->yHistogram[i] += s.yHistogram[i];\n> +\t\t}\n> +\t}\n> +\n> +\tsharedStats_->valid = valid;\n>  \tstatsReady.emit(frame, bufferId);\n>  }\n>  \n> @@ -389,12 +407,14 @@ int SwStatsCpu::setupStandardBayerOrder(BayerFormat::Order order)\n>  /**\n>   * \\brief Configure the statistics object for the passed in input format\n>   * \\param[in] inputCfg The input format\n> + * \\param[in] statsBufferCount number of internal stats buffers to use for multi-threading\n>   *\n>   * \\return 0 on success, a negative errno value on failure\n>   */\n> -int SwStatsCpu::configure(const StreamConfiguration &inputCfg)\n> +int SwStatsCpu::configure(const StreamConfiguration &inputCfg, unsigned int statsBufferCount)\n>  {\n>  \tstride_ = inputCfg.stride;\n> +\tstats_.resize(statsBufferCount);\n>  \n>  \tBayerFormat bayerFormat =\n>  \t\tBayerFormat::fromPixelFormat(inputCfg.pixelFormat);\n> @@ -504,7 +524,7 @@ void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)\n>  \t\t/* linePointers[0] is not used by any stats0_ functions */\n>  \t\tlinePointers[1] = src;\n>  \t\tlinePointers[2] = src + stride_;\n> -\t\t(this->*stats0_)(linePointers);\n> +\t\t(this->*stats0_)(linePointers, stats_[0]);\n>  \t\tsrc += stride_ * 2;\n>  \t}\n>  }","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id 7EA10C3237\n\tfor <parsemail@patchwork.libcamera.org>;\n\tWed, 25 Feb 2026 09:14:55 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 3D48E622B7;\n\tWed, 25 Feb 2026 10:14:54 +0100 (CET)","from us-smtp-delivery-124.mimecast.com\n\t(us-smtp-delivery-124.mimecast.com [170.10.129.124])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id E2407620C9\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tWed, 25 Feb 2026 10:14:51 +0100 (CET)","from mail-wr1-f71.google.com (mail-wr1-f71.google.com\n\t[209.85.221.71]) by relay.mimecast.com with ESMTP with STARTTLS\n\t(version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id\n\tus-mta-509-svaqVoeAP2Klfy6VcEYSXg-1; Wed, 25 Feb 2026 04:14:48 -0500","by mail-wr1-f71.google.com with SMTP id\n\tffacd0b85a97d-4363333c102so4695632f8f.1\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tWed, 25 Feb 2026 01:14:48 -0800 (PST)","from mzamazal-thinkpadp1gen7.tpbc.csb\n\t(ip-77-48-47-2.net.vodafone.cz. [77.48.47.2])\n\tby smtp.gmail.com with ESMTPSA id\n\tffacd0b85a97d-43970d3ff1csm31825129f8f.20.2026.02.25.01.14.45\n\t(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n\tWed, 25 Feb 2026 01:14:45 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=redhat.com header.i=@redhat.com\n\theader.b=\"MbrPshAd\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1772010890;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\tto:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\tin-reply-to:in-reply-to:references:references;\n\tbh=NJT1EVfWW+KdKAdUdPBUlfAYSLZxAtMDRYFUh1iF/lM=;\n\tb=MbrPshAdXVFxfC24NVcOogIcXQtw7lI9iL7rLqLd2uoYdWwrSYunYtbi+czUqXrz/V/pPR\n\tfShjzyZgIIiaqcvvA0/aL8+gLu4CKqLYwkfOE5SPzr4GJCMEEnR5+phr+KoJbh6NsYUjog\n\txqDTi+CI/l9XaRr/3ecR9KvUzswbk3Y=","X-MC-Unique":"svaqVoeAP2Klfy6VcEYSXg-1","X-Mimecast-MFC-AGG-ID":"svaqVoeAP2Klfy6VcEYSXg_1772010888","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1772010887; x=1772615687;\n\th=mime-version:user-agent:message-id:date:references:in-reply-to\n\t:subject:cc:to:from:x-gm-gg:x-gm-message-state:from:to:cc:subject\n\t:date:message-id:reply-to;\n\tbh=NJT1EVfWW+KdKAdUdPBUlfAYSLZxAtMDRYFUh1iF/lM=;\n\tb=kncDM/b4Bc3Pa1tJpeiWmULcdviTUs3mqdDYMyjhujTN+QbJhxgmRNdabl+asCkdQm\n\tIZdLPSTz7bWJU1FnPLJKrvARPt66m55LY7N/i8AlX2MqS3Jgq9LNufK3Enu9Ae6PJhqo\n\t/FhTX/DesK2KV6rFYBdO85y/sA5oIicgMOMKb+bD8lxwCNzVFowYlA95o5Gij4hWs4tF\n\tH7t9s4zYZkx0JJnPVpAv0UaHG9z8G+w6e7T5KGWM2YkzdHiWhDhBQl5Oapub4559GLs8\n\t7j34vGyHNmmjKSpaSi2J+vnSKnUG9EwM8uaPeGMmq9uhoM+EE7pcqLWd9PZAAbP9WlrU\n\tFcww==","X-Gm-Message-State":"AOJu0YzmqLaNcCnPuvpIJyqe4NS/RMgZGvw/C/BLYfd/CGXyAFsdRVvZ\n\toTrG2c5+Ti3VQ/t3OgXvUf7/QWIfOlJGD8AZe+vuzZDlaPL4zo3eoZmArhSsFYp57w0f4KMqDnq\n\tVHLAXpCCrOrG/l1WrM7H9OIFt8IlDVhZc6GgMg1EiiaLqDEE2Y8LOyBdvP6N4T2YtppDsTykfcw\n\t/4kFqdmssyRnz07sfB2zYlq8pl8dKy4p4TiB49/MAKVoUPEAuuwWY36KcdhlY=","X-Gm-Gg":"ATEYQzx4DNkH0njjq+55ZERH8oaOQn0Pthq7QA8OWCQbHtSWfI5VT5eyT9DFB85fWOG\n\tsIejkmfiVxpBq3nq5FBvWgtXmRwMetuSUGpBcI5tZMh15qgNSAQVEf4GxgHJ39oCEYZ3rCI3wub\n\tgS6QdE7zb1YTDhVT54TcrCGFGjagUS0qM9o8kOzPK8xKLkCIjEgS8L2fzj/W3PSoCsHyX6qYrl+\n\tzBYcagRYMkuXaDVL2/ZUmrAxsgkSNqH330nDF8or7FmNO1GjzYH4AKk2fbNsnpJAR3k88R/YtWf\n\tC52EWhUSoT0znW31Sh6zGEEfbQqSw5fXUkJvH2ElEhzjHL9JWBWWVUANlhgujRATMjr2vmahZZ+\n\tFaNJRpf9Q7tpZrCSuLXWgighhLekpWSMTzy7NZnAHRRnfF2unHmb/+xT3OJR4iaZqbnDUh+Cyh0\n\tY=","X-Received":["by 2002:a05:6000:1843:b0:439:8a14:ba2b with SMTP id\n\tffacd0b85a97d-4398a14bc21mr8312149f8f.34.1772010887292; \n\tWed, 25 Feb 2026 01:14:47 -0800 (PST)","by 2002:a05:6000:1843:b0:439:8a14:ba2b with SMTP id\n\tffacd0b85a97d-4398a14bc21mr8312093f8f.34.1772010886551; \n\tWed, 25 Feb 2026 01:14:46 -0800 (PST)"],"From":"Milan Zamazal <mzamazal@redhat.com>","To":"Hans de Goede <johannes.goede@oss.qualcomm.com>","Cc":"libcamera-devel@lists.libcamera.org","Subject":"Re: [PATCH v3 1/4] software_isp: swstats_cpu: Prepare for\n\tmulti-threading support","In-Reply-To":"<20260224193745.106186-2-johannes.goede@oss.qualcomm.com> (Hans\n\tde Goede's message of \"Tue, 24 Feb 2026 20:37:42 +0100\")","References":"<20260224193745.106186-1-johannes.goede@oss.qualcomm.com>\n\t<20260224193745.106186-2-johannes.goede@oss.qualcomm.com>","Date":"Wed, 25 Feb 2026 10:14:44 +0100","Message-ID":"<85a4wxchh7.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","User-Agent":"Gnus/5.13 (Gnus v5.13)","MIME-Version":"1.0","X-Mimecast-Spam-Score":"0","X-Mimecast-MFC-PROC-ID":"dhZyKsUiaZViS_cg-zCZnu8Kw2owLYJ_ynLMGc7UoiA_1772010888","X-Mimecast-Originator":"redhat.com","Content-Type":"text/plain","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}}]