[{"id":38216,"web_url":"https://patchwork.libcamera.org/comment/38216/","msgid":"<85cy23awvc.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","date":"2026-02-17T21:22:31","subject":"Re: [PATCH 2/5] software_isp: debayer_cpu: Add per render thread\n\tdata","submitter":{"id":177,"url":"https://patchwork.libcamera.org/api/people/177/","name":"Milan Zamazal","email":"mzamazal@redhat.com"},"content":"Hi Hans,\n\nthank you for the patch.\n\nLooks basically correct to me, some comments below.\n\nHans de Goede <johannes.goede@oss.qualcomm.com> writes:\n\n> Add a DebayerCpuThreadData data struct and use this in the inner render\n> loop. This contains data which needs to be separate per thread.\n>\n> This is a preparation patch for making DebayerCpu support multi-threading.\n>\n> Note this passed the DebayerCpuThreadData with a pointer rather then by\n\ns/then/than/\n\n> reference, because passing by reference is not supported for functions\n> passed as the thread function to std::thread().\n>\n> Benchmarking on the Uno-Q with a weak CPU which is good for performance\n> testing, shows 146-147ms per 3272x2464 frame both before and after this\n> change, with things maybe being 0.5 ms slower after this change.\n>\n> Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>\n> ---\n>  src/libcamera/software_isp/debayer_cpu.cpp | 90 ++++++++++++++--------\n>  src/libcamera/software_isp/debayer_cpu.h   | 30 +++++---\n>  2 files changed, 77 insertions(+), 43 deletions(-)\n>\n> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp\n> index 97c1959a..e1d3c164 100644\n> --- a/src/libcamera/software_isp/debayer_cpu.cpp\n> +++ b/src/libcamera/software_isp/debayer_cpu.cpp\n> @@ -41,7 +41,7 @@ namespace libcamera {\n>   * \\param[in] configuration The global configuration\n>   */\n>  DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats, const GlobalConfiguration &configuration)\n> -\t: Debayer(configuration), stats_(std::move(stats))\n> +\t: Debayer(configuration), stats_(std::move(stats)), threadCount_(1)\n>  {\n>  \t/*\n>  \t * Reading from uncached buffers may be very slow.\n> @@ -555,8 +555,9 @@ int DebayerCpu::configure(const StreamConfiguration &inputCfg,\n>  \t\t\t    2 * lineBufferPadding_;\n>  \n>  \tif (enableInputMemcpy_) {\n> -\t\tfor (unsigned int i = 0; i <= inputConfig_.patternSize.height; i++)\n> -\t\t\tlineBuffers_[i].resize(lineBufferLength_);\n> +\t\tfor (unsigned int i = 0; i < threadCount_; i++)\n> +\t\t\tfor (unsigned int j = 0; j <= inputConfig_.patternSize.height; j++)\n> +\t\t\t\tthreadData_[i].lineBuffers[j].resize(lineBufferLength_);\n>  \t}\n>  \n>  \treturn 0;\n> @@ -600,7 +601,8 @@ DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size\n>  \treturn std::make_tuple(stride, stride * size.height);\n>  }\n>  \n> -void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])\n> +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[],\n> +\t\t\t\t  DebayerCpuThreadData *threadData)\n>  {\n>  \tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n>  \n> @@ -608,14 +610,14 @@ void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])\n>  \t\treturn;\n>  \n>  \tfor (unsigned int i = 0; i < patternHeight; i++) {\n> -\t\tmemcpy(lineBuffers_[i].data(),\n> +\t\tmemcpy(threadData->lineBuffers[i].data(),\n>  \t\t       linePointers[i + 1] - lineBufferPadding_,\n>  \t\t       lineBufferLength_);\n> -\t\tlinePointers[i + 1] = lineBuffers_[i].data() + lineBufferPadding_;\n> +\t\tlinePointers[i + 1] = threadData->lineBuffers[i].data() + lineBufferPadding_;\n>  \t}\n>  \n>  \t/* Point lineBufferIndex_ to first unused lineBuffer */\n> -\tlineBufferIndex_ = patternHeight;\n> +\tthreadData->lineBufferIndex = patternHeight;\n>  }\n>  \n>  void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)\n> @@ -629,66 +631,78 @@ void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t\n>  \t\t\t\t      (patternHeight / 2) * (int)inputConfig_.stride;\n>  }\n>  \n> -void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])\n> +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[],\n> +\t\t\t\tDebayerCpuThreadData *threadData)\n>  {\n>  \tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n>  \n>  \tif (!enableInputMemcpy_)\n>  \t\treturn;\n>  \n> -\tmemcpy(lineBuffers_[lineBufferIndex_].data(),\n> +\tmemcpy(threadData->lineBuffers[threadData->lineBufferIndex].data(),\n>  \t       linePointers[patternHeight] - lineBufferPadding_,\n>  \t       lineBufferLength_);\n> -\tlinePointers[patternHeight] = lineBuffers_[lineBufferIndex_].data() + lineBufferPadding_;\n> +\tlinePointers[patternHeight] = threadData->lineBuffers[threadData->lineBufferIndex].data() + lineBufferPadding_;\n>  \n> -\tlineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);\n> +\tthreadData->lineBufferIndex = (threadData->lineBufferIndex + 1) % (patternHeight + 1);\n>  }\n>  \n> -void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)\n> +void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst,\n> +\t\t\t  DebayerCpuThreadData *threadData)\n>  {\n> -\tunsigned int yEnd = window_.height;\n>  \t/* Holds [0] previous- [1] current- [2] next-line */\n>  \tconst uint8_t *linePointers[3];\n>  \n>  \t/* Adjust src to top left corner of the window */\n> -\tsrc += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n> +\tsrc += (window_.y + threadData->yStart) * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n>  \n>  \t/* [x] becomes [x - 1] after initial shiftLinePointers() call */\n> -\tif (window_.y) {\n> +\tif (window_.y + threadData->yStart) {\n>  \t\tlinePointers[1] = src - inputConfig_.stride; /* previous-line */\n>  \t\tlinePointers[2] = src;\n>  \t} else {\n> -\t\t/* window_.y == 0, use the next line as prev line */\n> +\t\t/* Top line, use the next line as prev line */\n>  \t\tlinePointers[1] = src + inputConfig_.stride;\n>  \t\tlinePointers[2] = src;\n> +\t}\n> +\n> +\tif (window_.y == 0 && threadData->yEnd == window_.height) {\n>  \t\t/*\n>  \t\t * Last 2 lines also need special handling.\n>  \t\t * (And configure() ensures that yEnd >= 2.)\n>  \t\t */\n> -\t\tyEnd -= 2;\n> +\t\tthreadData->yEnd -= 2;\n> +\t\tthreadData->processLastLinesSeperately = true;\n\ns/Seperately/Separately/\n\n> +\t} else {\n> +\t\tthreadData->processLastLinesSeperately = false;\n>  \t}\n>  \n> -\tsetupInputMemcpy(linePointers);\n> +\tsetupInputMemcpy(linePointers, threadData);\n>  \n> -\tfor (unsigned int y = 0; y < yEnd; y += 2) {\n> +\t/*\n> +\t * Note y is the line-number *inside* the window, since stats_' window\n> +\t * is the stats window inside/relative to the debayer window. IOW for\n> +\t * single thread rendering y goes from 0 - window_.height.\n> +\t */\n> +\tfor (unsigned int y = threadData->yStart; y < threadData->yEnd; y += 2) {\n>  \t\tshiftLinePointers(linePointers, src);\n> -\t\tmemcpyNextLine(linePointers);\n> +\t\tmemcpyNextLine(linePointers, threadData);\n>  \t\tstats_->processLine0(frame, y, linePointers, &statsBuffer_);\n>  \t\t(this->*debayer0_)(dst, linePointers);\n>  \t\tsrc += inputConfig_.stride;\n>  \t\tdst += outputConfig_.stride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n> -\t\tmemcpyNextLine(linePointers);\n> +\t\tmemcpyNextLine(linePointers, threadData);\n>  \t\t(this->*debayer1_)(dst, linePointers);\n>  \t\tsrc += inputConfig_.stride;\n>  \t\tdst += outputConfig_.stride;\n>  \t}\n>  \n> -\tif (window_.y == 0) {\n> +\tif (threadData->processLastLinesSeperately) {\n>  \t\tshiftLinePointers(linePointers, src);\n> -\t\tmemcpyNextLine(linePointers);\n> -\t\tstats_->processLine0(frame, yEnd, linePointers, &statsBuffer_);\n> +\t\tmemcpyNextLine(linePointers, threadData);\n> +\t\tstats_->processLine0(frame, threadData->yEnd, linePointers, &statsBuffer_);\n>  \t\t(this->*debayer0_)(dst, linePointers);\n>  \t\tsrc += inputConfig_.stride;\n>  \t\tdst += outputConfig_.stride;\n> @@ -702,7 +716,8 @@ void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)\n>  \t}\n>  }\n>  \n> -void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)\n> +void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst,\n> +\t\t\t  DebayerCpuThreadData *threadData)\n>  {\n>  \t/*\n>  \t * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line\n> @@ -711,7 +726,7 @@ void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)\n>  \tconst uint8_t *linePointers[5];\n>  \n>  \t/* Adjust src to top left corner of the window */\n> -\tsrc += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n> +\tsrc += (window_.y + threadData->yStart) * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n>  \n>  \t/* [x] becomes [x - 1] after initial shiftLinePointers() call */\n>  \tlinePointers[1] = src - 2 * inputConfig_.stride;\n> @@ -719,31 +734,36 @@ void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)\n>  \tlinePointers[3] = src;\n>  \tlinePointers[4] = src + inputConfig_.stride;\n>  \n> -\tsetupInputMemcpy(linePointers);\n> +\tsetupInputMemcpy(linePointers, threadData);\n>  \n> -\tfor (unsigned int y = 0; y < window_.height; y += 4) {\n> +\t/*\n> +\t * Note y is the line-number *inside* the window, since stats_' window\n> +\t * is the stats window inside/relative to the debayer window. IOW for\n> +\t * single thread rendering y goes from 0 - window_.height.\n\ns/-/to/\n\n(The same in process4.)\n\n> +\t */\n> +\tfor (unsigned int y = threadData->yStart; y < threadData->yEnd; y += 4) {\n>  \t\tshiftLinePointers(linePointers, src);\n> -\t\tmemcpyNextLine(linePointers);\n> +\t\tmemcpyNextLine(linePointers, threadData);\n>  \t\tstats_->processLine0(frame, y, linePointers, &statsBuffer_);\n>  \t\t(this->*debayer0_)(dst, linePointers);\n>  \t\tsrc += inputConfig_.stride;\n>  \t\tdst += outputConfig_.stride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n> -\t\tmemcpyNextLine(linePointers);\n> +\t\tmemcpyNextLine(linePointers, threadData);\n>  \t\t(this->*debayer1_)(dst, linePointers);\n>  \t\tsrc += inputConfig_.stride;\n>  \t\tdst += outputConfig_.stride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n> -\t\tmemcpyNextLine(linePointers);\n> +\t\tmemcpyNextLine(linePointers, threadData);\n>  \t\tstats_->processLine2(frame, y, linePointers, &statsBuffer_);\n>  \t\t(this->*debayer2_)(dst, linePointers);\n>  \t\tsrc += inputConfig_.stride;\n>  \t\tdst += outputConfig_.stride;\n>  \n>  \t\tshiftLinePointers(linePointers, src);\n> -\t\tmemcpyNextLine(linePointers);\n> +\t\tmemcpyNextLine(linePointers, threadData);\n>  \t\t(this->*debayer3_)(dst, linePointers);\n>  \t\tsrc += inputConfig_.stride;\n>  \t\tdst += outputConfig_.stride;\n> @@ -868,10 +888,12 @@ void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output\n>  \n>  \tstats_->startFrame(frame, &statsBuffer_, 1);\n>  \n> +\tthreadData_[0].yStart = 0;\n> +\tthreadData_[0].yEnd = window_.height;\n>  \tif (inputConfig_.patternSize.height == 2)\n> -\t\tprocess2(frame, in.planes()[0].data(), out.planes()[0].data());\n> +\t\tprocess2(frame, in.planes()[0].data(), out.planes()[0].data(), &threadData_[0]);\n>  \telse\n> -\t\tprocess4(frame, in.planes()[0].data(), out.planes()[0].data());\n> +\t\tprocess4(frame, in.planes()[0].data(), out.planes()[0].data(), &threadData_[0]);\n>  \n>  \tmetadata.planes()[0].bytesused = out.planes()[0].size();\n>  \n> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h\n> index 8abf5168..800b018c 100644\n> --- a/src/libcamera/software_isp/debayer_cpu.h\n> +++ b/src/libcamera/software_isp/debayer_cpu.h\n> @@ -74,6 +74,19 @@ private:\n>  \t */\n>  \tusing debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);\n>  \n> +\t/* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */\n\nNot a matter of this patch but I wonder why 5?\n\n> +\tstatic constexpr unsigned int kMaxLineBuffers = 5;\n> +\n> +\t/* Per render thread data */\n> +\tstruct DebayerCpuThreadData {\n> +\t\tunsigned int yStart;\n> +\t\tunsigned int yEnd;\n> +\t\tstd::vector<uint8_t> lineBuffers[kMaxLineBuffers];\n> +\t\tunsigned int lineBufferIndex;\n> +\t\t/* Stored here to avoid causing register pressure in inner loop */\n\nWhat inner loop?\n\n> +\t\tbool processLastLinesSeperately;\n> +\t};\n> +\n>  \t/* 8-bit raw bayer format */\n>  \ttemplate<bool addAlphaByte, bool ccmEnabled>\n>  \tvoid debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);\n> @@ -105,17 +118,14 @@ private:\n>  \tint setDebayerFunctions(PixelFormat inputFormat,\n>  \t\t\t\tPixelFormat outputFormat,\n>  \t\t\t\tbool ccmEnabled);\n> -\tvoid setupInputMemcpy(const uint8_t *linePointers[]);\n> +\tvoid setupInputMemcpy(const uint8_t *linePointers[], DebayerCpuThreadData *threadData);\n>  \tvoid shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);\n> -\tvoid memcpyNextLine(const uint8_t *linePointers[]);\n> -\tvoid process2(uint32_t frame, const uint8_t *src, uint8_t *dst);\n> -\tvoid process4(uint32_t frame, const uint8_t *src, uint8_t *dst);\n> +\tvoid memcpyNextLine(const uint8_t *linePointers[], DebayerCpuThreadData *threadData);\n> +\tvoid process2(uint32_t frame, const uint8_t *src, uint8_t *dst, DebayerCpuThreadData *threadData);\n> +\tvoid process4(uint32_t frame, const uint8_t *src, uint8_t *dst, DebayerCpuThreadData *threadData);\n>  \tvoid updateGammaTable(const DebayerParams &params);\n>  \tvoid updateLookupTables(const DebayerParams &params);\n>  \n> -\t/* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */\n> -\tstatic constexpr unsigned int kMaxLineBuffers = 5;\n> -\n>  \tstatic constexpr unsigned int kRGBLookupSize = 256;\n>  \tstatic constexpr unsigned int kGammaLookupSize = 1024;\n>  \tstruct CcmColumn {\n> @@ -143,12 +153,14 @@ private:\n>  \tdebayerFn debayer3_;\n>  \tRectangle window_;\n>  \tstd::unique_ptr<SwStatsCpu> stats_;\n> -\tstd::vector<uint8_t> lineBuffers_[kMaxLineBuffers];\n>  \tunsigned int lineBufferLength_;\n>  \tunsigned int lineBufferPadding_;\n> -\tunsigned int lineBufferIndex_;\n>  \tunsigned int xShift_; /* Offset of 0/1 applied to window_.x */\n>  \tbool enableInputMemcpy_;\n> +\n> +\tstatic constexpr unsigned int kMaxThreads = 4;\n> +\tstruct DebayerCpuThreadData threadData_[kMaxThreads];\n\nI think std::array is preferred.\n\n> +\tunsigned int threadCount_;\n>  };\n>  \n>  } /* namespace libcamera */","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id 50042C0DA4\n\tfor <parsemail@patchwork.libcamera.org>;\n\tTue, 17 Feb 2026 21:22:41 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 6B2A662210;\n\tTue, 17 Feb 2026 22:22:40 +0100 (CET)","from us-smtp-delivery-124.mimecast.com\n\t(us-smtp-delivery-124.mimecast.com [170.10.133.124])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id A093D61FA0\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 17 Feb 2026 22:22:38 +0100 (CET)","from mail-wm1-f69.google.com (mail-wm1-f69.google.com\n\t[209.85.128.69]) by relay.mimecast.com with ESMTP with STARTTLS\n\t(version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id\n\tus-mta-74-rElEsxqEPhmsB_2hClnX4A-1; Tue, 17 Feb 2026 16:22:35 -0500","by mail-wm1-f69.google.com with SMTP id\n\t5b1f17b1804b1-4837cee2e9bso27165805e9.3\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 17 Feb 2026 13:22:35 -0800 (PST)","from mzamazal-thinkpadp1gen7.tpbc.csb\n\t(ip-77-48-47-2.net.vodafone.cz. [77.48.47.2])\n\tby smtp.gmail.com with ESMTPSA id\n\tffacd0b85a97d-43796a5b2d1sm40079811f8f.4.2026.02.17.13.22.32\n\t(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n\tTue, 17 Feb 2026 13:22:32 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=redhat.com header.i=@redhat.com\n\theader.b=\"CndlisiD\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1771363357;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\tto:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\tin-reply-to:in-reply-to:references:references;\n\tbh=rXxPNOtrnCAQtZLFSYxBqgGKYQIZtpaYb8qwmOn8EvQ=;\n\tb=CndlisiDv6h97ApUOIMLFV+UWanBMoCyvrMVKr/Ux/TxaYuTCHwIzLgHl17Jn+gsMPdEF0\n\tWlqfBRLDhoTow1H1gBX+BmamT/zQT8YfYwfJOGTGJg5V3w9QoxKzVoxJYHm3epcBRl9Wzv\n\t27TFK3CZuc0e3y3ODbvUDw3m/4tAjHM=","X-MC-Unique":"rElEsxqEPhmsB_2hClnX4A-1","X-Mimecast-MFC-AGG-ID":"rElEsxqEPhmsB_2hClnX4A_1771363354","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1771363354; x=1771968154;\n\th=mime-version:user-agent:message-id:date:references:in-reply-to\n\t:subject:cc:to:from:x-gm-gg:x-gm-message-state:from:to:cc:subject\n\t:date:message-id:reply-to;\n\tbh=rXxPNOtrnCAQtZLFSYxBqgGKYQIZtpaYb8qwmOn8EvQ=;\n\tb=jP6pWcVuZSD20ocRrrBxt4ZrSQZ51ZaeTctpxv7UrAtjvlfw2Ikps7640Is/Ny5seh\n\tbd9FCwtD6A7o2VwhZg5yKeGjJLpd1jNpahFmetQMvSumWbqSalxyM4gaQI77sla1rhou\n\t7Yh2Wh6hFukdrUDUaFbhYDzK0yk1reKzqGRO2yY3H+aCjdzkqXVnlKN6hI/A5HqxSKW/\n\tvUCKoYv+r1X8Hx48uQreUnVsqyGZiNPdAAq7RbuWWoym7FNnqUg0DgMCmkpE6mj0U3Jv\n\tWQq56mfEs10MxC2hLOfoZP4tzyJfZNfDDBxxkGfW+Baq1nodMkv/JMIXTFCmH0/0kUoB\n\tyBnA==","X-Gm-Message-State":"AOJu0YzxDKBLI07Fa3JOGjgC/utzGvXR+bpakYo8cReLfTfISk/0I5nh\n\toqPioTXcHAtYI/Y3Wqv8JOuZZIt9ZeKLd3m3ElNTMz0qHnUCah58yRGawt/OOffQ3XO0tuhbGr9\n\toaT70+TawIT1dAI8KwicFITonl1CtZIqXhH4UsZzmNjYcnbodAqBVXT5S5Jfxw65Ia/o46Z8niq\n\t1vpIZDqKkiumdX51/KzqP0nTuY/lFmJRnUQzKHXHI9MPZwXGHaTPXLt4R5lkY=","X-Gm-Gg":"AZuq6aJxrZnO/ESyVSiKTXCA5BtSJdkjJJYCpscaBjwJcVChmD7cECOHc7oy61XxOjx\n\tjkkT9p21bH1J/WPNt/bXc/KkfVJM+QjP9Wqo4WpWVDl8ynvf4dS3eAxsVDDE9lVEGhC0M64z9MO\n\tNCFNof5xBc91OwNMPbN6IGygKGFocJbgityiKw3DhAqEe/yTunmODpPyRVCA7HP5g4/RG+2/8/l\n\t1M8keFo3LHMFw7iWlTN3hMEE6iWTLDS1sb605CMYI0tHbPaz6fE7/FuJbg4Y+HNOQUBOL8NEcOZ\n\tYuXmvMNK6Qd+cy6kC/uJXITxhSqMuXMdOSqoFgki154IoZySTPQpnRT41tPpB1jsfIAu25QKE8q\n\t9eK3mkI03CVZFDQ9K0N4nRna2KpavfK7gHTa+YHEiRgxdpF0C4TwXX1WQuxzK5/z90Ivo1YRh80\n\tk=","X-Received":["by 2002:a05:600d:108:20b0:483:80b0:b245 with SMTP id\n\t5b1f17b1804b1-48380b0b344mr142344745e9.9.1771363353775; \n\tTue, 17 Feb 2026 13:22:33 -0800 (PST)","by 2002:a05:600d:108:20b0:483:80b0:b245 with SMTP id\n\t5b1f17b1804b1-48380b0b344mr142344325e9.9.1771363353159; \n\tTue, 17 Feb 2026 13:22:33 -0800 (PST)"],"From":"Milan Zamazal <mzamazal@redhat.com>","To":"Hans de Goede <johannes.goede@oss.qualcomm.com>","Cc":"libcamera-devel@lists.libcamera.org","Subject":"Re: [PATCH 2/5] software_isp: debayer_cpu: Add per render thread\n\tdata","In-Reply-To":"<20260216190204.106922-3-johannes.goede@oss.qualcomm.com> (Hans\n\tde Goede's message of \"Mon, 16 Feb 2026 20:02:01 +0100\")","References":"<20260216190204.106922-1-johannes.goede@oss.qualcomm.com>\n\t<20260216190204.106922-3-johannes.goede@oss.qualcomm.com>","Date":"Tue, 17 Feb 2026 22:22:31 +0100","Message-ID":"<85cy23awvc.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","User-Agent":"Gnus/5.13 (Gnus v5.13)","MIME-Version":"1.0","X-Mimecast-Spam-Score":"0","X-Mimecast-MFC-PROC-ID":"QQ-8FDNwVgYfQvI9pkw8ActiUUtBiBF-TtOEWSwoTfc_1771363354","X-Mimecast-Originator":"redhat.com","Content-Type":"text/plain","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":38274,"web_url":"https://patchwork.libcamera.org/comment/38274/","msgid":"<61d8ba2e-8ac8-4922-975d-2fba07efb42f@oss.qualcomm.com>","date":"2026-02-23T15:34:56","subject":"Re: [PATCH 2/5] software_isp: debayer_cpu: Add per render thread\n\tdata","submitter":{"id":242,"url":"https://patchwork.libcamera.org/api/people/242/","name":"Hans de Goede","email":"johannes.goede@oss.qualcomm.com"},"content":"Hi,\n\nOn 17-Feb-26 10:22 PM, Milan Zamazal wrote:\n> Hi Hans,\n> \n> thank you for the patch.\n> \n> Looks basically correct to me, some comments below.\n> \n> Hans de Goede <johannes.goede@oss.qualcomm.com> writes:\n> \n>> Add a DebayerCpuThreadData data struct and use this in the inner render\n>> loop. This contains data which needs to be separate per thread.\n>>\n>> This is a preparation patch for making DebayerCpu support multi-threading.\n>>\n>> Note this passed the DebayerCpuThreadData with a pointer rather then by\n> \n> s/then/than/\n\nNo longer relevant for v2, but I did end up doing a couple other s/then/than/\nfixes for v2.\n\n...\n\n>> +\tif (window_.y == 0 && threadData->yEnd == window_.height) {\n>>  \t\t/*\n>>  \t\t * Last 2 lines also need special handling.\n>>  \t\t * (And configure() ensures that yEnd >= 2.)\n>>  \t\t */\n>> -\t\tyEnd -= 2;\n>> +\t\tthreadData->yEnd -= 2;\n>> +\t\tthreadData->processLastLinesSeperately = true;\n> \n> s/Seperately/Separately/\n\nNo longer relevant for v2 (which is significantly reworked).\n\n...\n\n>> -\tfor (unsigned int y = 0; y < window_.height; y += 4) {\n>> +\t/*\n>> +\t * Note y is the line-number *inside* the window, since stats_' window\n>> +\t * is the stats window inside/relative to the debayer window. IOW for\n>> +\t * single thread rendering y goes from 0 - window_.height.\n> \n> s/-/to/\n> \n> (The same in process4.)\n\nAck both fixed for v2.\n\n...\n\n>> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h\n>> index 8abf5168..800b018c 100644\n>> --- a/src/libcamera/software_isp/debayer_cpu.h\n>> +++ b/src/libcamera/software_isp/debayer_cpu.h\n>> @@ -74,6 +74,19 @@ private:\n>>  \t */\n>>  \tusing debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);\n>>  \n>> +\t/* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */\n> \n> Not a matter of this patch but I wonder why 5?\n\nthis is for a mixed RGB-IR pattern which requires 2 lines above and below\nthe current line to get all colors. ATM the whole process4() support is\nunused I've out of tree patches for this here:\n\nhttps://github.com/jwrdegoede/libcamera/commits/ov01a1s/\n\nbut those are waiting for the new bayer-pattern control to allow\ncommunicating the special IGIG_GBGR_IGIG_GRGB bayer pattern to\nuserspace.\n\n...\n\n>> +\n>> +\tstatic constexpr unsigned int kMaxThreads = 4;\n>> +\tstruct DebayerCpuThreadData threadData_[kMaxThreads];\n> \n> I think std::array is preferred.\n\nAck.\n\nRegards,\n\nHans","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id A359BC0DA4\n\tfor <parsemail@patchwork.libcamera.org>;\n\tMon, 23 Feb 2026 15:35:04 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 491CE62287;\n\tMon, 23 Feb 2026 16:35:03 +0100 (CET)","from mx0a-0031df01.pphosted.com (mx0a-0031df01.pphosted.com\n\t[205.220.168.131])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 3D669621CE\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tMon, 23 Feb 2026 16:35:01 +0100 (CET)","from pps.filterd (m0279864.ppops.net [127.0.0.1])\n\tby mx0a-0031df01.pphosted.com (8.18.1.11/8.18.1.11) with ESMTP id\n\t61NAYB1I561294 for <libcamera-devel@lists.libcamera.org>;\n\tMon, 23 Feb 2026 15:34:59 GMT","from mail-qv1-f70.google.com (mail-qv1-f70.google.com\n\t[209.85.219.70])\n\tby mx0a-0031df01.pphosted.com (PPS) with ESMTPS id 4cgn8r8u6a-1\n\t(version=TLSv1.3 cipher=TLS_AES_128_GCM_SHA256 bits=128 verify=NOT)\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tMon, 23 Feb 2026 15:34:59 +0000 (GMT)","by mail-qv1-f70.google.com with SMTP id\n\t6a1803df08f44-89502dfd7b4so479600856d6.1\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tMon, 23 Feb 2026 07:34:59 -0800 (PST)","from [10.40.99.10] ([78.108.130.194])\n\tby smtp.gmail.com with ESMTPSA id\n\ta640c23a62f3a-b9084c84c37sm334395566b.23.2026.02.23.07.34.56\n\t(version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);\n\tMon, 23 Feb 2026 07:34:57 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (2048-bit key;\n\tunprotected) header.d=qualcomm.com header.i=@qualcomm.com\n\theader.b=\"DZ84rT0/\"; dkim=pass (2048-bit key;\n\tunprotected) header.d=oss.qualcomm.com header.i=@oss.qualcomm.com\n\theader.b=\"gfIP0U/H\"; dkim-atps=neutral","DKIM-Signature":["v=1; a=rsa-sha256; c=relaxed/relaxed; d=qualcomm.com; h=\n\tcc:content-transfer-encoding:content-type:date:from:in-reply-to\n\t:message-id:mime-version:references:subject:to; s=qcppdkim1; bh=\n\tK72vzkjkIpcByKFImEsDtcC2zOVLVUTqaPB4fYd7wII=; b=DZ84rT0/yueRTP3g\n\t8zqLcxUFuFadB8wA5NPBjyJm7t2mNDMIO4KTVDvGeRserCO4ApM3FKW823xwdC+I\n\tcF+OM2cX/Am5Y0p1UdmSXcVrw/iBvZr8BsFthLqM/7Tt6GE0PfxeHpdWMISxusSn\n\tEJRcTUUSmhw1J4eDh2bhFSydkXYoXGUh0MIEmq6G+LBXgV60cJCzmZKqPVs1hlE3\n\tNLf8b7PRIjRabxZRyzYOmR/ATnpaGL3u7ZehKUiy0fQGZ1Dn+4uRxqA19WwbS3cn\n\tPPTNv3Nl4XdexoU1AepLP3oDV3gRgc6fhnxELW0nFZnTQuVhW5nT5ct8l6TZsTvQ\n\t511U8Q==","v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=oss.qualcomm.com; s=google; t=1771860898; x=1772465698;\n\tdarn=lists.libcamera.org; \n\th=content-transfer-encoding:in-reply-to:content-language:references\n\t:cc:to:subject:from:user-agent:mime-version:date:message-id:from:to\n\t:cc:subject:date:message-id:reply-to;\n\tbh=K72vzkjkIpcByKFImEsDtcC2zOVLVUTqaPB4fYd7wII=;\n\tb=gfIP0U/H9MIrOWk08nPT4xFrdCnYSyUWgDKfw5G7MSsI663ncrRp/7X4Ol4n1UzEGt\n\tI/porUOSBcatplwlrjEOafBYEmqvAHqoYFyN0+HbXOdtrNlVHAGG9J50l6LjHTDdLZxa\n\tHF7hDzbKWeWfQ0+OexTBbWla78QA1uYYy6kVlb1jyTYwd+ClAjyJ2OGK1Oe64ZvR41ut\n\ttwXe6eyFeKH/r+dcQ0zL6QRChPu+Q36q/C8/aGzaQoZW+59FUDUXY2DaMSDwuLUfjhW4\n\t+XAGDnpOHRD0yyJpQ4iYjBtIq+3F4qWaAtwlL15oNig4j8KMeOF+s+V3s/71F5QmtOlJ\n\tgung=="],"X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1771860898; x=1772465698;\n\th=content-transfer-encoding:in-reply-to:content-language:references\n\t:cc:to:subject:from:user-agent:mime-version:date:message-id:x-gm-gg\n\t:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;\n\tbh=K72vzkjkIpcByKFImEsDtcC2zOVLVUTqaPB4fYd7wII=;\n\tb=I+lzfnibZ1cONuSMxkYBT1CrQHSPav8D4576OFbM1b9AFbiN8Ebjfx4sGvn1nEumKZ\n\tYiaRPCfeMV2oikPr6atQ41n0aYK9hvQkZvNi6njR1x9wYSzsk32H9jkNVvPl+zaHmJT1\n\t8AP7xJW7wNiOE38CBIclFG56KotJ70Tn4W+sdDwhRx+ys+pmUWkwlsagvxdorg2cyQfg\n\tw4tr45gcAalg/UXC7zaZCWiHVY6Pt1AgdWnnFF6CrGuxr2FaOk0k74PUnT+NJew0m8z9\n\tJE18QRVAJB03cJ7SplFD4KxSGlhcg07NFVdm473mIUiOSpCj3G0KtFqta3OMYXKHLhkl\n\tmzVw==","X-Gm-Message-State":"AOJu0Yz/cvAj0I6yFsFVeBQxgUfg+9CPxK0kJmbj0MFuU7MPQsHGw4Iw\n\tHqDs4fmydl9LIfCrPe5yiZvQTJZeGkuNn6JkrKVN6nwADeVpPQQKsoyZfKaBl+9foSYHf4gpyq+\n\tp8OgG8jJb+BB5/Kapb7FmphVZOvfP6wmr8jdRUjtZF+VRYrDpLK8D0FWeXPB8J6Momi0mlocDYI\n\tO2zO1ofd1D","X-Gm-Gg":"AZuq6aIVIKrTcgL1iui27FiHadWtzFGeAiQMhId0/UUiw31eilWFasMSOOBUO00exur\n\t9k/tVMQbeW8525q4XuwdyVWQkPvsLGFffb8gFUvPAesiamN9kNxoxUer9hI8ZBB4Apdl8Ibr+SX\n\tF+XQt+yBNUs2xtlAzHk85yrRpL1y/rKh2MOPw/TtpCrkhpAM6zanf52WPRq6ukR3ZG56r2gCB6a\n\tFtbL95GNnqut75KFKdZoBzpbzmv5dNKQMHaIKQmp3Fpp4F542g6wc1ERGouFZ6XOpx4obslNrE7\n\tRRQ1wmOzf6z7/yfZvq7Qv0hqwfBc4Q4g6NW+X3kfSfvQvUOZ7mgUv/Oy8J55KsQATKyxrTsQ9PV\n\tjF/+QT7fwEKj/blyAUS08LaY6T42QqC+tYnxuAQGQ","X-Received":["by 2002:a05:620a:d86:b0:8c6:a5c7:a7ee with SMTP id\n\taf79cd13be357-8cb8ca67456mr1117761285a.53.1771860898145; \n\tMon, 23 Feb 2026 07:34:58 -0800 (PST)","by 2002:a05:620a:d86:b0:8c6:a5c7:a7ee with SMTP id\n\taf79cd13be357-8cb8ca67456mr1117756285a.53.1771860897638; \n\tMon, 23 Feb 2026 07:34:57 -0800 (PST)"],"Message-ID":"<61d8ba2e-8ac8-4922-975d-2fba07efb42f@oss.qualcomm.com>","Date":"Mon, 23 Feb 2026 16:34:56 +0100","MIME-Version":"1.0","User-Agent":"Mozilla Thunderbird","From":"Hans de Goede <johannes.goede@oss.qualcomm.com>","Subject":"Re: [PATCH 2/5] software_isp: debayer_cpu: Add per render thread\n\tdata","To":"Milan Zamazal <mzamazal@redhat.com>","Cc":"libcamera-devel@lists.libcamera.org","References":"<20260216190204.106922-1-johannes.goede@oss.qualcomm.com>\n\t<20260216190204.106922-3-johannes.goede@oss.qualcomm.com>\n\t<85cy23awvc.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","Content-Language":"en-US, nl","In-Reply-To":"<85cy23awvc.fsf@mzamazal-thinkpadp1gen7.tpbc.csb>","Content-Type":"text/plain; charset=UTF-8","Content-Transfer-Encoding":"7bit","X-Proofpoint-ORIG-GUID":"32PlCn2e5yPNbMeFYmi35FPJZYb2mvRm","X-Authority-Analysis":"v=2.4 cv=V7twEOni c=1 sm=1 tr=0 ts=699c73a3 cx=c_pps\n\ta=oc9J++0uMp73DTRD5QyR2A==:117 a=rrvG0T/C2D967D07Ol03YQ==:17\n\ta=IkcTkHD0fZMA:10 a=HzLeVaNsDn8A:10 a=s4-Qcg_JpJYA:10\n\ta=VkNPw1HP01LnGYTKEx00:22 a=u7WPNUs3qKkmUXheDGA7:22\n\ta=DJpcGTmdVt4CTyJn9g5Z:22\n\ta=NEAV23lmAAAA:8 a=EUspDBNiAAAA:8 a=XbWfKFpA5ZfWnj73MYgA:9\n\ta=QEXdDO2ut3YA:10 a=iYH6xdkBrDN1Jqds4HTS:22","X-Proofpoint-Spam-Details-Enc":"AW1haW4tMjYwMjIzMDEzMyBTYWx0ZWRfX4R/SnAvvkDlg\n\tNtMefXpA+3J1rcnn1kwaTKmQBzhALW/52mAysERi5g1JFFXTvNlLQcAFDEWD3+wOxHGM+A1ORuR\n\tblWkyHl5UQ4BjDVnoKePy0SZFp1ehG7IN+QAUr0KpfSywu8eoQf7ru2VznXURMgYhYE3dkLf9bG\n\tJ1Oaq5E/dedfnaHONUfX8uqp6KZD+9aShRozLwm9YfHp9gZh1fD24m3WLUFS5L2Sd+DnWSXzH8C\n\tbo11x+4kKYegaegkszV8ENR9OSpWOJ1UlnYV5P7vIIsOgpjnyY4Gb2lDOZOPL9N22tObZhOn3hi\n\tBfC1t8SchfLubDHdMIXqA9PMmJIYuXQOBVtqz+osV2ePtMzOqp14l+gQE8+2sJmm4MupDcOndeL\n\tvDK4q/tNiZFq03GUeni/Zrv5rLympikPgJrzed86RSmn+zjO0Aw5C9O6XuSjT6BufkrF+/rg/Q4\n\tAN5+1i2JaVRNDt/Xk8A==","X-Proofpoint-GUID":"32PlCn2e5yPNbMeFYmi35FPJZYb2mvRm","X-Proofpoint-Virus-Version":"vendor=baseguard\n\tengine=ICAP:2.0.293, Aquarius:18.0.1121, Hydra:6.1.51,\n\tFMLib:17.12.100.49\n\tdefinitions=2026-02-23_03,2026-02-23_03,2025-10-01_01","X-Proofpoint-Spam-Details":"rule=outbound_notspam policy=outbound score=0\n\tclxscore=1015 spamscore=0 lowpriorityscore=0 suspectscore=0\n\tphishscore=0\n\timpostorscore=0 bulkscore=0 priorityscore=1501 malwarescore=0\n\tadultscore=0\n\tclassifier=typeunknown authscore=0 authtc= authcc= route=outbound\n\tadjust=0\n\treason=mlx scancount=1 engine=8.22.0-2602130000\n\tdefinitions=main-2602230133","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}}]