[{"id":38277,"web_url":"https://patchwork.libcamera.org/comment/38277/","msgid":"<e5a35728-352f-41d6-9663-e5b3547dc17a@ideasonboard.com>","date":"2026-02-23T16:33:21","subject":"Re: [PATCH v2 2/4] software_isp: debayer_cpu: Add DebayerCpuThread\n\tclass","submitter":{"id":216,"url":"https://patchwork.libcamera.org/api/people/216/","name":"Barnabás Pőcze","email":"barnabas.pocze@ideasonboard.com"},"content":"Hi\n\n2026. 02. 23. 17:09 keltezéssel, Hans de Goede írta:\n> Add a DebayerCpuThreadclass and use this in the inner render loop.\n> This contains data which needs to be separate per thread.\n> \n> This is a preparation patch for making DebayerCpu support multi-threading.\n> \n> Benchmarking on the Arduino Uno-Q with a weak CPU which is good for\n> performance testing, shows 146-147ms per 3272x2464 frame both before and\n> after this change, with things maybe being 0.5 ms slower after this change.\n> \n> Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>\n> ---\n> Changes in v2:\n> - Replace the DebayerCpuThreadData struct from v1 with a DebayerCpuThread\n>    class, derived from Object to allow calling invokeMethod for thread re-use\n>    in followup patches\n> - As part of this also move a bunch of methods which primarily deal with\n>    per thread data: setupInputMemcpy(), shiftLinePointers(), memcpyNextLine(),\n>    process*() to the new DebayerCpuThread class\n> ---\n>   src/libcamera/software_isp/debayer_cpu.cpp | 215 ++++++++++++++-------\n>   src/libcamera/software_isp/debayer_cpu.h   |  20 +-\n>   2 files changed, 159 insertions(+), 76 deletions(-)\n> \n> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp\n> index e7b012105..122bfbb05 100644\n> --- a/src/libcamera/software_isp/debayer_cpu.cpp\n> +++ b/src/libcamera/software_isp/debayer_cpu.cpp\n> @@ -27,6 +27,38 @@\n> [...]\n> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h\n> index 7a6517462..7196dcdd0 100644\n> --- a/src/libcamera/software_isp/debayer_cpu.h\n> +++ b/src/libcamera/software_isp/debayer_cpu.h\n> @@ -26,6 +26,7 @@\n>   \n>   namespace libcamera {\n>   \n> +class DebayerCpuThread;\n>   class DebayerCpu : public Debayer\n>   {\n>   public:\n> @@ -44,6 +45,8 @@ public:\n>   \tconst SharedFD &getStatsFD() { return stats_->getStatsFD(); }\n>   \n>   private:\n> +\tfriend class DebayerCpuThread;\n> +\n>   \t/**\n>   \t * \\brief Called to debayer 1 line of Bayer input data to output format\n>   \t * \\param[out] dst Pointer to the start of the output line to write\n> @@ -74,6 +77,11 @@ private:\n>   \t */\n>   \tusing debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);\n>   \n> +\tvoid debayer0(uint8_t *dst, const uint8_t *src[]) { (this->*debayer0_)(dst, src); }\n> +\tvoid debayer1(uint8_t *dst, const uint8_t *src[]) { (this->*debayer1_)(dst, src); }\n> +\tvoid debayer2(uint8_t *dst, const uint8_t *src[]) { (this->*debayer2_)(dst, src); }\n> +\tvoid debayer3(uint8_t *dst, const uint8_t *src[]) { (this->*debayer3_)(dst, src); }\n> +\n>   \t/* 8-bit raw bayer format */\n>   \ttemplate<bool addAlphaByte, bool ccmEnabled>\n>   \tvoid debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);\n> @@ -105,11 +113,6 @@ private:\n>   \tint setDebayerFunctions(PixelFormat inputFormat,\n>   \t\t\t\tPixelFormat outputFormat,\n>   \t\t\t\tbool ccmEnabled);\n> -\tvoid setupInputMemcpy(const uint8_t *linePointers[]);\n> -\tvoid shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);\n> -\tvoid memcpyNextLine(const uint8_t *linePointers[]);\n> -\tvoid process2(uint32_t frame, const uint8_t *src, uint8_t *dst);\n> -\tvoid process4(uint32_t frame, const uint8_t *src, uint8_t *dst);\n>   \tvoid updateGammaTable(const DebayerParams &params);\n>   \tvoid updateLookupTables(const DebayerParams &params);\n>   \n> @@ -142,12 +145,9 @@ private:\n>   \tdebayerFn debayer3_;\n>   \tRectangle window_;\n>   \tstd::unique_ptr<SwStatsCpu> stats_;\n> -\tstd::vector<uint8_t> lineBuffers_[kMaxLineBuffers];\n> -\tunsigned int lineBufferLength_;\n> -\tunsigned int lineBufferPadding_;\n> -\tunsigned int lineBufferIndex_;\n>   \tunsigned int xShift_; /* Offset of 0/1 applied to window_.x */\n> -\tbool enableInputMemcpy_;\n> +\n> +\tstd::vector<DebayerCpuThread *>threads_;\n\nThis should be `std::unique_ptr<>` or similar.\n\n\n>   };\n>   \n>   } /* namespace libcamera */","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id 28CA7C0DA4\n\tfor <parsemail@patchwork.libcamera.org>;\n\tMon, 23 Feb 2026 16:33:27 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 652CC6228C;\n\tMon, 23 Feb 2026 17:33:26 +0100 (CET)","from perceval.ideasonboard.com (perceval.ideasonboard.com\n\t[IPv6:2001:4b98:dc2:55:216:3eff:fef7:d647])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id C7A7A621CE\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tMon, 23 Feb 2026 17:33:24 +0100 (CET)","from [192.168.33.88] (185.221.141.206.nat.pool.zt.hu\n\t[185.221.141.206])\n\tby perceval.ideasonboard.com (Postfix) with ESMTPSA id 9DF254F1;\n\tMon, 23 Feb 2026 17:32:28 +0100 (CET)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=ideasonboard.com header.i=@ideasonboard.com\n\theader.b=\"eKS238Y4\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/simple; d=ideasonboard.com;\n\ts=mail; t=1771864348;\n\tbh=n2yzmzS9ZFgIc09kNX2TbnRjvuONiT5HHSpUuUmueT8=;\n\th=Date:Subject:To:Cc:References:From:In-Reply-To:From;\n\tb=eKS238Y4Rs1eCL7JjKc5Gtyj/wHLxUNP0ysiTEJ82cgUQgyOksLAHDMXL1LrMwbyM\n\tteh4duTlBSRAVkeLQvQtJvwBM2nnfiLg173zlX6bqI+YZyAcpnznuMnBWjL/DDgb/J\n\tSuBhOTq0IsyDXWHbFrMSrYBYq1Vu4AbBb9ump0/Q=","Message-ID":"<e5a35728-352f-41d6-9663-e5b3547dc17a@ideasonboard.com>","Date":"Mon, 23 Feb 2026 17:33:21 +0100","MIME-Version":"1.0","User-Agent":"Mozilla Thunderbird","Subject":"Re: [PATCH v2 2/4] software_isp: debayer_cpu: Add DebayerCpuThread\n\tclass","To":"Hans de Goede <johannes.goede@oss.qualcomm.com>,\n\tlibcamera-devel@lists.libcamera.org","Cc":"Milan Zamazal <mzamazal@redhat.com>","References":"<20260223160930.27913-1-johannes.goede@oss.qualcomm.com>\n\t<20260223160930.27913-3-johannes.goede@oss.qualcomm.com>","From":"=?utf-8?q?Barnab=C3=A1s_P=C5=91cze?= <barnabas.pocze@ideasonboard.com>","Content-Language":"en-US, hu-HU","In-Reply-To":"<20260223160930.27913-3-johannes.goede@oss.qualcomm.com>","Content-Type":"text/plain; charset=UTF-8; format=flowed","Content-Transfer-Encoding":"8bit","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":38280,"web_url":"https://patchwork.libcamera.org/comment/38280/","msgid":"<85qzqalad2.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","date":"2026-02-24T10:11:21","subject":"Re: [PATCH v2 2/4] software_isp: debayer_cpu: Add DebayerCpuThread\n\tclass","submitter":{"id":177,"url":"https://patchwork.libcamera.org/api/people/177/","name":"Milan Zamazal","email":"mzamazal@redhat.com"},"content":"Hi Hans,\n\nthank you for the update.\n\nHans de Goede <johannes.goede@oss.qualcomm.com> writes:\n\n> Add a DebayerCpuThreadclass and use this in the inner render loop.\n> This contains data which needs to be separate per thread.\n>\n> This is a preparation patch for making DebayerCpu support multi-threading.\n>\n> Benchmarking on the Arduino Uno-Q with a weak CPU which is good for\n> performance testing, shows 146-147ms per 3272x2464 frame both before and\n> after this change, with things maybe being 0.5 ms slower after this change.\n>\n> Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>\n> ---\n> Changes in v2:\n> - Replace the DebayerCpuThreadData struct from v1 with a DebayerCpuThread\n>   class, derived from Object to allow calling invokeMethod for thread re-use\n>   in followup patches\n> - As part of this also move a bunch of methods which primarily deal with\n>   per thread data: setupInputMemcpy(), shiftLinePointers(), memcpyNextLine(),\n>   process*() to the new DebayerCpuThread class\n> ---\n>  src/libcamera/software_isp/debayer_cpu.cpp | 215 ++++++++++++++-------\n>  src/libcamera/software_isp/debayer_cpu.h   |  20 +-\n>  2 files changed, 159 insertions(+), 76 deletions(-)\n>\n> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp\n> index e7b012105..122bfbb05 100644\n> --- a/src/libcamera/software_isp/debayer_cpu.cpp\n> +++ b/src/libcamera/software_isp/debayer_cpu.cpp\n> @@ -27,6 +27,38 @@\n>  \n>  namespace libcamera {\n>  \n> +class DebayerCpuThread : public Object\n\nBuilding it complains about this class and its members not documented.\n\n> +{\n> +public:\n> +\tDebayerCpuThread(DebayerCpu *debayer, unsigned int threadIndex,\n> +\t\t\t bool enableInputMemcpy);\n> +\n> +\tvoid configure(unsigned int yStart, unsigned int yEnd);\n> +\tvoid setupInputMemcpy(const uint8_t *linePointers[]);\n> +\tvoid shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);\n> +\tvoid memcpyNextLine(const uint8_t *linePointers[]);\n> +\tvoid process(uint32_t frame, const uint8_t *src, uint8_t *dst);\n> +\tvoid process2(uint32_t frame, const uint8_t *src, uint8_t *dst);\n> +\tvoid process4(uint32_t frame, const uint8_t *src, uint8_t *dst);\n> +\n> +\tDebayerCpu *debayer_;\n> +\tunsigned int threadIndex_;\n> +\tunsigned int yStart_;\n> +\tunsigned int yEnd_;\n> +\tunsigned int lineBufferLength_;\n> +\tunsigned int lineBufferPadding_;\n> +\tunsigned int lineBufferIndex_;\n> +\tstd::vector<uint8_t> lineBuffers_[DebayerCpu::kMaxLineBuffers];\n> +\tbool enableInputMemcpy_;\n> +};\n> +\n> +DebayerCpuThread::DebayerCpuThread(DebayerCpu *debayer, unsigned int threadIndex,\n> +\t\t\t\t   bool enableInputMemcpy)\n> +\t: debayer_(debayer), threadIndex_(threadIndex),\n> +\t  enableInputMemcpy_(enableInputMemcpy)\n> +{\n> +}\n> +\n>  /**\n>   * \\class DebayerCpu\n>   * \\brief Class for debayering on the CPU\n> @@ -53,8 +85,14 @@ DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats, const GlobalConfigurat\n>  \t * \\todo Make memcpy automatic based on runtime detection of platform\n>  \t * capabilities.\n>  \t */\n> -\tenableInputMemcpy_ =\n> +\tbool enableInputMemcpy =\n>  \t\tconfiguration.option<bool>({ \"software_isp\", \"copy_input_buffer\" }).value_or(true);\n> +\n> +\t/* Just one thread object for now, which will be called inline rather than async */\n> +\tthreads_.resize(1);\n> +\n> +\tfor (unsigned int i = 0; i < threads_.size(); i++)\n> +\t\tthreads_[i] = new DebayerCpuThread(this, i, enableInputMemcpy);\n>  }\n>  \n>  DebayerCpu::~DebayerCpu() = default;\n> @@ -484,7 +522,7 @@ int DebayerCpu::configure(const StreamConfiguration &inputCfg,\n>  \tif (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)\n>  \t\treturn -EINVAL;\n>  \n> -\tif (stats_->configure(inputCfg) != 0)\n> +\tif (stats_->configure(inputCfg, threads_.size()) != 0)\n>  \t\treturn -EINVAL;\n>  \n>  \tconst Size &statsPatternSize = stats_->patternSize();\n> @@ -548,17 +586,36 @@ int DebayerCpu::configure(const StreamConfiguration &inputCfg,\n>  \t */\n>  \tstats_->setWindow(Rectangle(window_.size()));\n>  \n> +\tunsigned int yStart = 0;\n> +\tunsigned int linesPerThread = (window_.height / threads_.size()) &\n> +\t\t\t\t      ~(inputConfig_.patternSize.height - 1);\n> +\tunsigned int i;\n> +\n> +\tfor (i = 0; i < (threads_.size() - 1); i++) {\n> +\t\tthreads_[i]->configure(yStart, yStart + linesPerThread);\n> +\t\tyStart += linesPerThread;\n> +\t}\n> +\tthreads_[i]->configure(yStart, window_.height);\n> +\n> +\treturn 0;\n> +}\n> +\n> +void DebayerCpuThread::configure(unsigned int yStart, unsigned int yEnd)\n> +{\n> +\tDebayer::DebayerInputConfig &inputConfig = debayer_->inputConfig_;\n> +\n> +\tyStart_ = yStart;\n> +\tyEnd_ = yEnd;\n> +\n>  \t/* pad with patternSize.Width on both left and right side */\n> -\tlineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8;\n> -\tlineBufferLength_ = window_.width * inputConfig_.bpp / 8 +\n> +\tlineBufferPadding_ = inputConfig.patternSize.width * inputConfig.bpp / 8;\n> +\tlineBufferLength_ = debayer_->window_.width * inputConfig.bpp / 8 +\n>  \t\t\t    2 * lineBufferPadding_;\n>  \n>  \tif (enableInputMemcpy_) {\n> -\t\tfor (unsigned int i = 0; i <= inputConfig_.patternSize.height; i++)\n> +\t\tfor (unsigned int i = 0; i <= inputConfig.patternSize.height; i++)\n>  \t\t\tlineBuffers_[i].resize(lineBufferLength_);\n>  \t}\n> -\n> -\treturn 0;\n>  }\n>  \n>  /*\n> @@ -599,9 +656,9 @@ DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size\n>  \treturn std::make_tuple(stride, stride * size.height);\n>  }\n>  \n> -void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])\n> +void DebayerCpuThread::setupInputMemcpy(const uint8_t *linePointers[])\n>  {\n> -\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\tconst unsigned int patternHeight = debayer_->inputConfig_.patternSize.height;\n>  \n>  \tif (!enableInputMemcpy_)\n>  \t\treturn;\n> @@ -617,20 +674,20 @@ void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])\n>  \tlineBufferIndex_ = patternHeight;\n>  }\n>  \n> -void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)\n> +void DebayerCpuThread::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)\n>  {\n> -\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\tconst unsigned int patternHeight = debayer_->inputConfig_.patternSize.height;\n>  \n>  \tfor (unsigned int i = 0; i < patternHeight; i++)\n>  \t\tlinePointers[i] = linePointers[i + 1];\n>  \n> -\tlinePointers[patternHeight] = src +\n> -\t\t\t\t      (patternHeight / 2) * (int)inputConfig_.stride;\n> +\tlinePointers[patternHeight] =\n> +\t\tsrc + (patternHeight / 2) * (int)debayer_->inputConfig_.stride;\n>  }\n>  \n> -void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])\n> +void DebayerCpuThread::memcpyNextLine(const uint8_t *linePointers[])\n>  {\n> -\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\tconst unsigned int patternHeight = debayer_->inputConfig_.patternSize.height;\n>  \n>  \tif (!enableInputMemcpy_)\n>  \t\treturn;\n> @@ -643,23 +700,42 @@ void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])\n>  \tlineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);\n>  }\n>  \n> -void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)\n> +void DebayerCpuThread::process(uint32_t frame, const uint8_t *src, uint8_t *dst)\n>  {\n> -\tunsigned int yEnd = window_.height;\n> +\tRectangle &window = debayer_->window_;\n> +\n> +\t/* Adjust src to top left corner of the window */\n> +\tsrc += (window.y + yStart_) * debayer_->inputConfig_.stride +\n> +\t       window.x * debayer_->inputConfig_.bpp / 8;\n> +\t/* Adjust dst for yStart_ */\n> +\tdst += yStart_ * debayer_->outputConfig_.stride;\n> +\n> +\tif (debayer_->inputConfig_.patternSize.height == 2)\n> +\t\tprocess2(frame, src, dst);\n> +\telse\n> +\t\tprocess4(frame, src, dst);\n> +}\n> +\n> +void DebayerCpuThread::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)\n> +{\n> +\tunsigned int outputStride = debayer_->outputConfig_.stride;\n> +\tunsigned int inputStride = debayer_->inputConfig_.stride;\n> +\tRectangle &window = debayer_->window_;\n> +\tunsigned int yEnd = yEnd_;\n>  \t/* Holds [0] previous- [1] current- [2] next-line */\n>  \tconst uint8_t *linePointers[3];\n>  \n> -\t/* Adjust src to top left corner of the window */\n> -\tsrc += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n> -\n>  \t/* [x] becomes [x - 1] after initial shiftLinePointers() call */\n> -\tif (window_.y) {\n> -\t\tlinePointers[1] = src - inputConfig_.stride; /* previous-line */\n> +\tif (window.y + yStart_) {\n> +\t\tlinePointers[1] = src - inputStride; /* previous-line */\n>  \t\tlinePointers[2] = src;\n>  \t} else {\n> -\t\t/* window_.y == 0, use the next line as prev line */\n> -\t\tlinePointers[1] = src + inputConfig_.stride;\n> +\t\t/* Top line, use the next line as prev line */\n> +\t\tlinePointers[1] = src + inputStride;\n>  \t\tlinePointers[2] = src;\n> +\t}\n> +\n> +\tif (window.y == 0 && yEnd_ == window.height) {\n>  \t\t/*\n>  \t\t * Last 2 lines also need special handling.\n>  \t\t * (And configure() ensures that yEnd >= 2.)\n> @@ -669,83 +745,93 @@ void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)\n>  \n>  \tsetupInputMemcpy(linePointers);\n>  \n> -\tfor (unsigned int y = 0; y < yEnd; y += 2) {\n> +\t/*\n> +\t * Note y is the line-number *inside* the window, since stats_' window\n> +\t * is the stats window inside/relative to the debayer window. IOW for\n> +\t * single thread rendering y goes from 0 to window.height.\n> +\t */\n> +\tfor (unsigned int y = yStart_; y < yEnd; y += 2) {\n>  \t\tshiftLinePointers(linePointers, src);\n>  \t\tmemcpyNextLine(linePointers);\n> -\t\tstats_->processLine0(frame, y, linePointers);\n> -\t\t(this->*debayer0_)(dst, linePointers);\n> -\t\tsrc += inputConfig_.stride;\n> -\t\tdst += outputConfig_.stride;\n> +\t\tdebayer_->stats_->processLine0(frame, y, linePointers, threadIndex_);\n> +\t\tdebayer_->debayer0(dst, linePointers);\n> +\t\tsrc += inputStride;\n> +\t\tdst += outputStride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n>  \t\tmemcpyNextLine(linePointers);\n> -\t\t(this->*debayer1_)(dst, linePointers);\n> -\t\tsrc += inputConfig_.stride;\n> -\t\tdst += outputConfig_.stride;\n> +\t\tdebayer_->debayer1(dst, linePointers);\n> +\t\tsrc += inputStride;\n> +\t\tdst += outputStride;\n>  \t}\n>  \n> -\tif (window_.y == 0) {\n> +\tif (window.y == 0 && yEnd_ == window.height) {\n>  \t\tshiftLinePointers(linePointers, src);\n>  \t\tmemcpyNextLine(linePointers);\n> -\t\tstats_->processLine0(frame, yEnd, linePointers);\n> -\t\t(this->*debayer0_)(dst, linePointers);\n> -\t\tsrc += inputConfig_.stride;\n> -\t\tdst += outputConfig_.stride;\n> +\t\tdebayer_->stats_->processLine0(frame, yEnd, linePointers, threadIndex_);\n> +\t\tdebayer_->debayer0(dst, linePointers);\n> +\t\tsrc += inputStride;\n> +\t\tdst += outputStride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n>  \t\t/* next line may point outside of src, use prev. */\n>  \t\tlinePointers[2] = linePointers[0];\n> -\t\t(this->*debayer1_)(dst, linePointers);\n> -\t\tsrc += inputConfig_.stride;\n> -\t\tdst += outputConfig_.stride;\n> +\t\tdebayer_->debayer1(dst, linePointers);\n> +\t\tsrc += inputStride;\n> +\t\tdst += outputStride;\n>  \t}\n>  }\n>  \n> -void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)\n> +void DebayerCpuThread::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)\n>  {\n> +\tunsigned int outputStride = debayer_->outputConfig_.stride;\n> +\tunsigned int inputStride = debayer_->inputConfig_.stride;\n> +\n>  \t/*\n>  \t * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line\n>  \t * [3] 1-line-down [4] 2-lines-down.\n>  \t */\n>  \tconst uint8_t *linePointers[5];\n>  \n> -\t/* Adjust src to top left corner of the window */\n> -\tsrc += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n> -\n>  \t/* [x] becomes [x - 1] after initial shiftLinePointers() call */\n> -\tlinePointers[1] = src - 2 * inputConfig_.stride;\n> -\tlinePointers[2] = src - inputConfig_.stride;\n> +\tlinePointers[1] = src - 2 * inputStride;\n> +\tlinePointers[2] = src - inputStride;\n>  \tlinePointers[3] = src;\n> -\tlinePointers[4] = src + inputConfig_.stride;\n> +\tlinePointers[4] = src + inputStride;\n>  \n>  \tsetupInputMemcpy(linePointers);\n>  \n> -\tfor (unsigned int y = 0; y < window_.height; y += 4) {\n> +\t/*\n> +\t * Note y is the line-number *inside* the window, since stats_' window\n> +\t * is the stats window inside/relative to the debayer window. IOW for\n> +\t * single thread rendering y goes from 0 to window.height.\n> +\t */\n> +\tfor (unsigned int y = yStart_; y < yEnd_; y += 4) {\n>  \t\tshiftLinePointers(linePointers, src);\n>  \t\tmemcpyNextLine(linePointers);\n> -\t\tstats_->processLine0(frame, y, linePointers);\n> -\t\t(this->*debayer0_)(dst, linePointers);\n> -\t\tsrc += inputConfig_.stride;\n> -\t\tdst += outputConfig_.stride;\n> +\t\tdebayer_->stats_->processLine0(frame, y, linePointers, threadIndex_);\n> +\t\tdebayer_->debayer0(dst, linePointers);\n> +\t\tsrc += inputStride;\n> +\t\tdst += outputStride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n>  \t\tmemcpyNextLine(linePointers);\n> -\t\t(this->*debayer1_)(dst, linePointers);\n> -\t\tsrc += inputConfig_.stride;\n> -\t\tdst += outputConfig_.stride;\n> +\t\tdebayer_->debayer1(dst, linePointers);\n> +\t\tsrc += inputStride;\n> +\t\tdst += outputStride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n>  \t\tmemcpyNextLine(linePointers);\n> -\t\tstats_->processLine2(frame, y, linePointers);\n> -\t\t(this->*debayer2_)(dst, linePointers);\n> -\t\tsrc += inputConfig_.stride;\n> -\t\tdst += outputConfig_.stride;\n> +\t\tdebayer_->stats_->processLine2(frame, y, linePointers, threadIndex_);\n> +\t\tdebayer_->debayer2(dst, linePointers);\n> +\t\tsrc += inputStride;\n> +\t\tdst += outputStride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n>  \t\tmemcpyNextLine(linePointers);\n> -\t\t(this->*debayer3_)(dst, linePointers);\n> -\t\tsrc += inputConfig_.stride;\n> -\t\tdst += outputConfig_.stride;\n> +\t\tdebayer_->debayer3(dst, linePointers);\n> +\t\tsrc += inputStride;\n> +\t\tdst += outputStride;\n>  \t}\n>  }\n>  \n> @@ -867,10 +953,7 @@ void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output\n>  \n>  \tstats_->startFrame(frame);\n>  \n> -\tif (inputConfig_.patternSize.height == 2)\n> -\t\tprocess2(frame, in.planes()[0].data(), out.planes()[0].data());\n> -\telse\n> -\t\tprocess4(frame, in.planes()[0].data(), out.planes()[0].data());\n> +\tthreads_[0]->process(frame, in.planes()[0].data(), out.planes()[0].data());\n>  \n>  \tmetadata.planes()[0].bytesused = out.planes()[0].size();\n>  \n> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h\n> index 7a6517462..7196dcdd0 100644\n> --- a/src/libcamera/software_isp/debayer_cpu.h\n> +++ b/src/libcamera/software_isp/debayer_cpu.h\n> @@ -26,6 +26,7 @@\n>  \n>  namespace libcamera {\n>  \n> +class DebayerCpuThread;\n>  class DebayerCpu : public Debayer\n>  {\n>  public:\n> @@ -44,6 +45,8 @@ public:\n>  \tconst SharedFD &getStatsFD() { return stats_->getStatsFD(); }\n>  \n>  private:\n> +\tfriend class DebayerCpuThread;\n> +\n>  \t/**\n>  \t * \\brief Called to debayer 1 line of Bayer input data to output format\n>  \t * \\param[out] dst Pointer to the start of the output line to write\n> @@ -74,6 +77,11 @@ private:\n>  \t */\n>  \tusing debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);\n>  \n> +\tvoid debayer0(uint8_t *dst, const uint8_t *src[]) { (this->*debayer0_)(dst, src); }\n> +\tvoid debayer1(uint8_t *dst, const uint8_t *src[]) { (this->*debayer1_)(dst, src); }\n> +\tvoid debayer2(uint8_t *dst, const uint8_t *src[]) { (this->*debayer2_)(dst, src); }\n> +\tvoid debayer3(uint8_t *dst, const uint8_t *src[]) { (this->*debayer3_)(dst, src); }\n> +\n>  \t/* 8-bit raw bayer format */\n>  \ttemplate<bool addAlphaByte, bool ccmEnabled>\n>  \tvoid debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);\n> @@ -105,11 +113,6 @@ private:\n>  \tint setDebayerFunctions(PixelFormat inputFormat,\n>  \t\t\t\tPixelFormat outputFormat,\n>  \t\t\t\tbool ccmEnabled);\n> -\tvoid setupInputMemcpy(const uint8_t *linePointers[]);\n> -\tvoid shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);\n> -\tvoid memcpyNextLine(const uint8_t *linePointers[]);\n> -\tvoid process2(uint32_t frame, const uint8_t *src, uint8_t *dst);\n> -\tvoid process4(uint32_t frame, const uint8_t *src, uint8_t *dst);\n>  \tvoid updateGammaTable(const DebayerParams &params);\n>  \tvoid updateLookupTables(const DebayerParams &params);\n>  \n> @@ -142,12 +145,9 @@ private:\n>  \tdebayerFn debayer3_;\n>  \tRectangle window_;\n>  \tstd::unique_ptr<SwStatsCpu> stats_;\n> -\tstd::vector<uint8_t> lineBuffers_[kMaxLineBuffers];\n> -\tunsigned int lineBufferLength_;\n> -\tunsigned int lineBufferPadding_;\n> -\tunsigned int lineBufferIndex_;\n>  \tunsigned int xShift_; /* Offset of 0/1 applied to window_.x */\n> -\tbool enableInputMemcpy_;\n> +\n> +\tstd::vector<DebayerCpuThread *>threads_;\n>  };\n>  \n>  } /* namespace libcamera */","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id B55B3BE175\n\tfor <parsemail@patchwork.libcamera.org>;\n\tTue, 24 Feb 2026 10:11:32 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 5958B6229B;\n\tTue, 24 Feb 2026 11:11:31 +0100 (CET)","from us-smtp-delivery-124.mimecast.com\n\t(us-smtp-delivery-124.mimecast.com [170.10.133.124])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 19C44620FA\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 24 Feb 2026 11:11:28 +0100 (CET)","from mail-wm1-f71.google.com (mail-wm1-f71.google.com\n\t[209.85.128.71]) by relay.mimecast.com with ESMTP with STARTTLS\n\t(version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id\n\tus-mta-142-3Q3pvYRjPcCjeg6fSEflDQ-1; Tue, 24 Feb 2026 05:11:25 -0500","by mail-wm1-f71.google.com with SMTP id\n\t5b1f17b1804b1-483a2db68caso41773085e9.0\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 24 Feb 2026 02:11:25 -0800 (PST)","from mzamazal-thinkpadp1gen7.tpbc.csb\n\t(ip-77-48-47-2.net.vodafone.cz. [77.48.47.2])\n\tby smtp.gmail.com with ESMTPSA id\n\tffacd0b85a97d-43970c00d95sm25266746f8f.13.2026.02.24.02.11.22\n\t(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n\tTue, 24 Feb 2026 02:11:22 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=redhat.com header.i=@redhat.com\n\theader.b=\"CQcqUGCd\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1771927887;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\tto:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\tin-reply-to:in-reply-to:references:references;\n\tbh=MmGwCZqYcHKfxDrDLeaIArDYvPvHVg4dcbuRY81FvcY=;\n\tb=CQcqUGCd98uYSoPNY/7lT40Fv3rOP4Owyo+8obiKDNQnLQ8KrhNjhs5hr/to05fy+VH2nc\n\tRpSo2Xvo87azgYUctN+aJq34gPXG96VRHOXDn3IPekEWz705meHiS7stpD5kVIcHfWhQJN\n\tN7GprA+5DIFJ7ytla3mZ9kAgio/RW80=","X-MC-Unique":"3Q3pvYRjPcCjeg6fSEflDQ-1","X-Mimecast-MFC-AGG-ID":"3Q3pvYRjPcCjeg6fSEflDQ_1771927885","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1771927884; x=1772532684;\n\th=mime-version:user-agent:message-id:date:references:in-reply-to\n\t:subject:cc:to:from:x-gm-gg:x-gm-message-state:from:to:cc:subject\n\t:date:message-id:reply-to;\n\tbh=MmGwCZqYcHKfxDrDLeaIArDYvPvHVg4dcbuRY81FvcY=;\n\tb=X0iVRyNrVEQxmCvx+BqPSiW44fSx7Vg9Q04G5fVEUAS/EIfM0+6i1M/TcuX/YdpWCs\n\tLnb2IXsxAyeC+upVaj6goUq2BDPuz5E4j0ixqydAKLjbUfIgnx2G0+X3STQv2GX+6ieP\n\t5/weeYAhzOOZ2qc6OxaGh5rJJYCMQc7cVUX/g11w0ITgGUn5y5W9qd+foB/TxQF0RV/I\n\tzjBpGgKnnjQOYYSwLDy/j3hYTEAoC8MvNGB34afWxLG9+sy0DPWvhZWEuOazoiRwAVJX\n\tRecgcx2C0PMn7Nz5xMWL8fBTZNeL6v+U0bS5kvTD4OnDt4usx7ryIyqEhEriLpA0naQQ\n\tfJiA==","X-Gm-Message-State":"AOJu0Yz8xxvW1oC4bFG07ookPoht1HCsXiJze4BI55sViwOVRC7Xfw+1\n\trTC1Ya3t1C+bh4oHrCVPyM9RRb91TlBvCU8wlTLpwsi+9xKl7VfEaqtW3erak/NNV4wmyXWOMa9\n\t7XPkreuUIdz1XRuHNvi710U19E2HGagcoIf7bqudDebjvlTeJdn+4REhx4agtzySQFBZAsLm8JR\n\tNohL8FALQ1UPVgNnR740qJ3E195qjNk5OHihahUKJUWrwddgYILeQGVWxELw4=","X-Gm-Gg":"AZuq6aI5/AEBynMbjMj7TY59uLxR5RGz7ymwFltjx1no0e3hjPuVaftLdB1Hzry2QoZ\n\th2j6oM4sv+Uq8lzV9KUygG6mC37EqBmsPklbyx2ohc9IPBd0/NpD+9X+kLH5n1IljES6vPosqMk\n\tvVTgZXji2tuuYF0O7XfL9PJnMKZ4N37to1iQzblUY76+8ZyxOGbr7kGVx2wx4b1CwYEl0rZzITN\n\tQ/iDm7RAkprfjpCp5zJdbWNIKxzrPLLoFNLHfVyQgMnuOjloWqA0HJQOBMMaXYOtoi7xr+HWBpY\n\tgTF4eEQJMBa+IL7jJZuZau7edwQEAtkcsN0w1eggQs3DoZPA+bVuXicsbETBEjloRXC/rgv2mw6\n\tRFN3wHOT62afcv7vjpa4zpmqDKdMxgWJt5UFNiAlwuVjS4npIvBkDQYbMvBYIWeWsXJ+4RcjGwm\n\t4=","X-Received":["by 2002:a05:600c:3b18:b0:483:64b4:79da with SMTP id\n\t5b1f17b1804b1-483a95e6b80mr172712325e9.26.1771927884184; \n\tTue, 24 Feb 2026 02:11:24 -0800 (PST)","by 2002:a05:600c:3b18:b0:483:64b4:79da with SMTP id\n\t5b1f17b1804b1-483a95e6b80mr172711645e9.26.1771927883405; \n\tTue, 24 Feb 2026 02:11:23 -0800 (PST)"],"From":"Milan Zamazal <mzamazal@redhat.com>","To":"Hans de Goede <johannes.goede@oss.qualcomm.com>","Cc":"libcamera-devel@lists.libcamera.org","Subject":"Re: [PATCH v2 2/4] software_isp: debayer_cpu: Add DebayerCpuThread\n\tclass","In-Reply-To":"<20260223160930.27913-3-johannes.goede@oss.qualcomm.com> (Hans\n\tde Goede's message of \"Mon, 23 Feb 2026 17:09:28 +0100\")","References":"<20260223160930.27913-1-johannes.goede@oss.qualcomm.com>\n\t<20260223160930.27913-3-johannes.goede@oss.qualcomm.com>","Date":"Tue, 24 Feb 2026 11:11:21 +0100","Message-ID":"<85qzqalad2.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","User-Agent":"Gnus/5.13 (Gnus v5.13)","MIME-Version":"1.0","X-Mimecast-Spam-Score":"0","X-Mimecast-MFC-PROC-ID":"nsd1j9YlozjwleBFn8LQyD5zEXczbaJ7WzjbIdjFqew_1771927885","X-Mimecast-Originator":"redhat.com","Content-Type":"text/plain","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}}]