[v4] libcamera: debayer_cpu: Sync DMABUFs
diff mbox series

Message ID 20240916161231.238116-1-robert.mader@collabora.com
State Accepted
Headers show
Series
  • [v4] libcamera: debayer_cpu: Sync DMABUFs
Related show

Commit Message

Robert Mader Sept. 16, 2024, 4:12 p.m. UTC
Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
correct output. Not doing so currently results in occasional tearing
and/or backlashes in GL/VK clients that use the buffers directly for
rendering.

An alternative approach to have the sync code in `MappedFrameBuffer` was
considered but rejected for now, in order to allow clients more
flexibility.

Signed-off-by: Robert Mader <robert.mader@collabora.com>

---

Changes in v4:
 - Fixed errno handling
 - Added sync flags to error message
 - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace

Changes in v3:
 - add syncBufferForCPU() helper function

Changes in v2:
 - sync input buffer as well
 - update commit title accordingly
 - small changes to the commit message
 - linting fixes, should pass now
---
 src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Kieran Bingham Sept. 17, 2024, 12:01 p.m. UTC | #1
Quoting Robert Mader (2024-09-16 17:12:31)
> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
> correct output. Not doing so currently results in occasional tearing
> and/or backlashes in GL/VK clients that use the buffers directly for
> rendering.
> 
> An alternative approach to have the sync code in `MappedFrameBuffer` was
> considered but rejected for now, in order to allow clients more
> flexibility.
> 
> Signed-off-by: Robert Mader <robert.mader@collabora.com>
> 
> ---
> 
> Changes in v4:
>  - Fixed errno handling
>  - Added sync flags to error message
>  - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
> 
> Changes in v3:
>  - add syncBufferForCPU() helper function
> 
> Changes in v2:
>  - sync input buffer as well
>  - update commit title accordingly
>  - small changes to the commit message
>  - linting fixes, should pass now
> ---
>  src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> index 077f7f4b..96cf2c71 100644
> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> @@ -12,8 +12,11 @@
>  #include "debayer_cpu.h"
>  
>  #include <stdlib.h>
> +#include <sys/ioctl.h>
>  #include <time.h>
>  
> +#include <linux/dma-buf.h>
> +
>  #include <libcamera/formats.h>
>  
>  #include "libcamera/internal/bayer_format.h"
> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
>         }
>  }
>  
> +namespace {
> +
> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)

Now these are in an annoymous namespace, they don't need to be static -
but that can be fixed while applying perhaps or ignored for now if no
further version is required otherwise.

I'm happy to merge, but under software_isp/* I think we should have an
Acked-by/Reviewed-by by one of the SoftISP developers.

Milan, Hans ? Are you happy with this patch? Is there any impact with
the Intel devices for instance?

--
Kieran




> +{
> +       for (const FrameBuffer::Plane &plane : buffer->planes()) {
> +               const int fd = plane.fd.get();
> +               struct dma_buf_sync sync = { syncFlags };
> +               int ret;
> +
> +               ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
> +               if (ret < 0) {
> +                       ret = errno;
> +                       LOG(Debayer, Error)
> +                               << "Syncing buffer FD " << fd << " with flags "
> +                               << syncFlags << " failed: " << strerror(-ret);
> +               }
> +       }
> +}
> +
>  static inline int64_t timeDiff(timespec &after, timespec &before)
>  {
>         return (after.tv_sec - before.tv_sec) * 1000000000LL +
>                (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
>  }
>  
> +} /* namespace */
> +
>  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>  {
>         timespec frameStartTime;
> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>                 clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
>         }
>  
> +       syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
> +       syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
> +
>         green_ = params.green;
>         red_ = swapRedBlueGains_ ? params.blue : params.red;
>         blue_ = swapRedBlueGains_ ? params.red : params.blue;
> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>  
>         metadata.planes()[0].bytesused = out.planes()[0].size();
>  
> +       syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
> +       syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
> +
>         /* Measure before emitting signals */
>         if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
>             ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
> -- 
> 2.46.0
>
Milan Zamazal Sept. 18, 2024, 9:55 a.m. UTC | #2
Kieran Bingham <kieran.bingham@ideasonboard.com> writes:

> Quoting Robert Mader (2024-09-16 17:12:31)
>> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
>> correct output. Not doing so currently results in occasional tearing
>
>> and/or backlashes in GL/VK clients that use the buffers directly for
>> rendering.
>> 
>> An alternative approach to have the sync code in `MappedFrameBuffer` was
>> considered but rejected for now, in order to allow clients more
>> flexibility.
>> 
>> Signed-off-by: Robert Mader <robert.mader@collabora.com>
>> 
>> ---
>> 
>> Changes in v4:
>>  - Fixed errno handling
>>  - Added sync flags to error message
>>  - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
>> 
>> Changes in v3:
>>  - add syncBufferForCPU() helper function
>> 
>> Changes in v2:
>>  - sync input buffer as well
>>  - update commit title accordingly
>>  - small changes to the commit message
>>  - linting fixes, should pass now
>> ---
>>  src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
>>  1 file changed, 30 insertions(+)
>> 
>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
>> index 077f7f4b..96cf2c71 100644
>> --- a/src/libcamera/software_isp/debayer_cpu.cpp
>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
>> @@ -12,8 +12,11 @@
>>  #include "debayer_cpu.h"
>>  
>>  #include <stdlib.h>
>> +#include <sys/ioctl.h>
>>  #include <time.h>
>>  
>> +#include <linux/dma-buf.h>
>> +
>>  #include <libcamera/formats.h>
>>  
>>  #include "libcamera/internal/bayer_format.h"
>> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
>>         }
>>  }
>>  
>> +namespace {
>> +
>> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
>
> Now these are in an annoymous namespace, they don't need to be static -
> but that can be fixed while applying perhaps or ignored for now if no
> further version is required otherwise.
>
> I'm happy to merge, but under software_isp/* I think we should have an
> Acked-by/Reviewed-by by one of the SoftISP developers.
>
> Milan, Hans ? Are you happy with this patch? Is there any impact with
> the Intel devices for instance?

I'm fine with the patch (with the exception below) and it works fine on
Debix (with only ~0,5% performance penalty).  It would be nice if Hans
tested it on Intel.

> --
> Kieran
>
>
>
>
>> +{
>> +       for (const FrameBuffer::Plane &plane : buffer->planes()) {
>> +               const int fd = plane.fd.get();
>> +               struct dma_buf_sync sync = { syncFlags };
>> +               int ret;
>> +
>> +               ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
>> +               if (ret < 0) {
>> +                       ret = errno;
>> +                       LOG(Debayer, Error)
>> +                               << "Syncing buffer FD " << fd << " with flags "
>> +                               << syncFlags << " failed: " << strerror(-ret);

I think `ret' and not `-ret' should be used here.  And I would drop
`ret = errno' above and use errno here directly.  I'm not sure why
Kieran suggested exactly this -- using strerror is right but changing
the sign seems wrong (tested in my environment).

>> +               }
>> +       }
>> +}
>> +
>>  static inline int64_t timeDiff(timespec &after, timespec &before)
>>  {
>>         return (after.tv_sec - before.tv_sec) * 1000000000LL +
>>                (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
>>  }
>>  
>> +} /* namespace */
>> +
>>  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>>  {
>>         timespec frameStartTime;
>> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>>                 clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
>>         }
>>  
>> +       syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
>> +       syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
>> +
>>         green_ = params.green;
>>         red_ = swapRedBlueGains_ ? params.blue : params.red;
>>         blue_ = swapRedBlueGains_ ? params.red : params.blue;
>> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>>  
>>         metadata.planes()[0].bytesused = out.planes()[0].size();
>>  
>> +       syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
>> +       syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
>> +
>>         /* Measure before emitting signals */
>>         if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
>>             ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
>> -- 
>> 2.46.0
>>
Robert Mader Sept. 18, 2024, 10:26 a.m. UTC | #3
One thing I'd like to highlight again for the record: while the patch 
here reduces glitches considerably for me, I still occasionally see some 
when using GL clients like Snapshot.

Assuming that's not because of hardware/driver bugs (I see it on quite 
different platforms), that could be caused by us still waiting for 
implicit/explicit sync before reusing buffers.

 From https://docs.kernel.org/driver-api/dma-buf.html:

> The synchronization provided via DMA_BUF_IOCTL_SYNC only provides 
> cache coherency. It does not prevent other processes or devices from 
> accessing the memory at the same time. If synchronization with a GPU 
> or other device driver is required, it is the client’s responsibility 
> to wait for buffer to be ready for reading or writing before calling 
> this ioctl with DMA_BUF_SYNC_START. Likewise, the client must ensure 
> that follow-up work is not submitted to GPU or other device driver 
> until after this ioctl has been called with DMA_BUF_SYNC_END?
>
> If the driver or API with which the client is interacting uses 
> implicit synchronization, waiting for prior work to complete can be 
> done via poll() on the DMA buffer file descriptor. If the driver or 
> API requires explicit synchronization, the client may have to wait on 
> a sync_file or other synchronization primitive outside the scope of 
> the DMA buffer API.
>
I wonder if we should add a swISP TODO for this - or is opening a 
bugzilla issue for tracking enough?

P.S.: I'm not entirely sure if the udmabuf sync implementation really 
doesn't wait for implicit fences - but if it does, there's no guarantee 
that things stay that way.

On 16.09.24 18:12, Robert Mader wrote:
> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
> correct output. Not doing so currently results in occasional tearing
> and/or backlashes in GL/VK clients that use the buffers directly for
> rendering.
>
> An alternative approach to have the sync code in `MappedFrameBuffer` was
> considered but rejected for now, in order to allow clients more
> flexibility.
>
> Signed-off-by: Robert Mader <robert.mader@collabora.com>
>
> ---
>
> Changes in v4:
>   - Fixed errno handling
>   - Added sync flags to error message
>   - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
>
> Changes in v3:
>   - add syncBufferForCPU() helper function
>
> Changes in v2:
>   - sync input buffer as well
>   - update commit title accordingly
>   - small changes to the commit message
>   - linting fixes, should pass now
> ---
>   src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
>   1 file changed, 30 insertions(+)
>
> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> index 077f7f4b..96cf2c71 100644
> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> @@ -12,8 +12,11 @@
>   #include "debayer_cpu.h"
>   
>   #include <stdlib.h>
> +#include <sys/ioctl.h>
>   #include <time.h>
>   
> +#include <linux/dma-buf.h>
> +
>   #include <libcamera/formats.h>
>   
>   #include "libcamera/internal/bayer_format.h"
> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
>   	}
>   }
>   
> +namespace {
> +
> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
> +{
> +	for (const FrameBuffer::Plane &plane : buffer->planes()) {
> +		const int fd = plane.fd.get();
> +		struct dma_buf_sync sync = { syncFlags };
> +		int ret;
> +
> +		ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
> +		if (ret < 0) {
> +			ret = errno;
> +			LOG(Debayer, Error)
> +				<< "Syncing buffer FD " << fd << " with flags "
> +				<< syncFlags << " failed: " << strerror(-ret);
> +		}
> +	}
> +}
> +
>   static inline int64_t timeDiff(timespec &after, timespec &before)
>   {
>   	return (after.tv_sec - before.tv_sec) * 1000000000LL +
>   	       (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
>   }
>   
> +} /* namespace */
> +
>   void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>   {
>   	timespec frameStartTime;
> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>   		clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
>   	}
>   
> +	syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
> +	syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
> +
>   	green_ = params.green;
>   	red_ = swapRedBlueGains_ ? params.blue : params.red;
>   	blue_ = swapRedBlueGains_ ? params.red : params.blue;
> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>   
>   	metadata.planes()[0].bytesused = out.planes()[0].size();
>   
> +	syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
> +	syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
> +
>   	/* Measure before emitting signals */
>   	if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
>   	    ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
Kieran Bingham Sept. 18, 2024, 10:56 a.m. UTC | #4
Quoting Milan Zamazal (2024-09-18 10:55:47)
> Kieran Bingham <kieran.bingham@ideasonboard.com> writes:
> 
> > Quoting Robert Mader (2024-09-16 17:12:31)
> >> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
> >> correct output. Not doing so currently results in occasional tearing
> >
> >> and/or backlashes in GL/VK clients that use the buffers directly for
> >> rendering.
> >> 
> >> An alternative approach to have the sync code in `MappedFrameBuffer` was
> >> considered but rejected for now, in order to allow clients more
> >> flexibility.
> >> 
> >> Signed-off-by: Robert Mader <robert.mader@collabora.com>
> >> 
> >> ---
> >> 
> >> Changes in v4:
> >>  - Fixed errno handling
> >>  - Added sync flags to error message
> >>  - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
> >> 
> >> Changes in v3:
> >>  - add syncBufferForCPU() helper function
> >> 
> >> Changes in v2:
> >>  - sync input buffer as well
> >>  - update commit title accordingly
> >>  - small changes to the commit message
> >>  - linting fixes, should pass now
> >> ---
> >>  src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
> >>  1 file changed, 30 insertions(+)
> >> 
> >> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> >> index 077f7f4b..96cf2c71 100644
> >> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> >> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> >> @@ -12,8 +12,11 @@
> >>  #include "debayer_cpu.h"
> >>  
> >>  #include <stdlib.h>
> >> +#include <sys/ioctl.h>
> >>  #include <time.h>
> >>  
> >> +#include <linux/dma-buf.h>
> >> +
> >>  #include <libcamera/formats.h>
> >>  
> >>  #include "libcamera/internal/bayer_format.h"
> >> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
> >>         }
> >>  }
> >>  
> >> +namespace {
> >> +
> >> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
> >
> > Now these are in an annoymous namespace, they don't need to be static -
> > but that can be fixed while applying perhaps or ignored for now if no
> > further version is required otherwise.
> >
> > I'm happy to merge, but under software_isp/* I think we should have an
> > Acked-by/Reviewed-by by one of the SoftISP developers.
> >
> > Milan, Hans ? Are you happy with this patch? Is there any impact with
> > the Intel devices for instance?
> 
> I'm fine with the patch (with the exception below) and it works fine on
> Debix (with only ~0,5% performance penalty).  It would be nice if Hans
> tested it on Intel.
> 
> > --
> > Kieran
> >
> >
> >
> >
> >> +{
> >> +       for (const FrameBuffer::Plane &plane : buffer->planes()) {
> >> +               const int fd = plane.fd.get();
> >> +               struct dma_buf_sync sync = { syncFlags };
> >> +               int ret;
> >> +
> >> +               ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
> >> +               if (ret < 0) {
> >> +                       ret = errno;
> >> +                       LOG(Debayer, Error)
> >> +                               << "Syncing buffer FD " << fd << " with flags "
> >> +                               << syncFlags << " failed: " << strerror(-ret);
> 
> I think `ret' and not `-ret' should be used here.  And I would drop
> `ret = errno' above and use errno here directly.  I'm not sure why
> Kieran suggested exactly this -- using strerror is right but changing
> the sign seems wrong (tested in my environment).

This is a pattern we've used throughout libcamera. It's not advised to
use errno directly in a stream operation ( << ) because other parts of
the stream or calls, or even the construction of the LOG() class could
do something that also sets errno.

So we take a copy of errno as soon as the error is detected.

Yes - I'm wrong on the -ret in that instance. Sorry - I got confused
because we would only return a negative error value - but in this
instance we are in a void function that doesn't return anything.
(there's no point I think).

But yes I inverted the value - sorry. As long as 'errno' is not directly
used in the LOG() << errno; statements it's fine.

--
Kieran

> 
> >> +               }
> >> +       }
> >> +}
> >> +
> >>  static inline int64_t timeDiff(timespec &after, timespec &before)
> >>  {
> >>         return (after.tv_sec - before.tv_sec) * 1000000000LL +
> >>                (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
> >>  }
> >>  
> >> +} /* namespace */
> >> +
> >>  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
> >>  {
> >>         timespec frameStartTime;
> >> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
> >>                 clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
> >>         }
> >>  
> >> +       syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
> >> +       syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
> >> +
> >>         green_ = params.green;
> >>         red_ = swapRedBlueGains_ ? params.blue : params.red;
> >>         blue_ = swapRedBlueGains_ ? params.red : params.blue;
> >> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
> >>  
> >>         metadata.planes()[0].bytesused = out.planes()[0].size();
> >>  
> >> +       syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
> >> +       syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
> >> +
> >>         /* Measure before emitting signals */
> >>         if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
> >>             ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
> >> -- 
> >> 2.46.0
> >>
>
Milan Zamazal Sept. 18, 2024, 11:16 a.m. UTC | #5
Kieran Bingham <kieran.bingham@ideasonboard.com> writes:

> Quoting Milan Zamazal (2024-09-18 10:55:47)
>> Kieran Bingham <kieran.bingham@ideasonboard.com> writes:
>> 
>
>> > Quoting Robert Mader (2024-09-16 17:12:31)
>> >> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
>> >> correct output. Not doing so currently results in occasional tearing
>> >
>> >> and/or backlashes in GL/VK clients that use the buffers directly for
>> >> rendering.
>> >> 
>> >> An alternative approach to have the sync code in `MappedFrameBuffer` was
>> >> considered but rejected for now, in order to allow clients more
>> >> flexibility.
>> >> 
>> >> Signed-off-by: Robert Mader <robert.mader@collabora.com>
>> >> 
>> >> ---
>> >> 
>> >> Changes in v4:
>> >>  - Fixed errno handling
>> >>  - Added sync flags to error message
>> >>  - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
>> >> 
>> >> Changes in v3:
>> >>  - add syncBufferForCPU() helper function
>> >> 
>> >> Changes in v2:
>> >>  - sync input buffer as well
>> >>  - update commit title accordingly
>> >>  - small changes to the commit message
>> >>  - linting fixes, should pass now
>> >> ---
>> >>  src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
>> >>  1 file changed, 30 insertions(+)
>> >> 
>> >> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
>> >> index 077f7f4b..96cf2c71 100644
>> >> --- a/src/libcamera/software_isp/debayer_cpu.cpp
>> >> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
>> >> @@ -12,8 +12,11 @@
>> >>  #include "debayer_cpu.h"
>> >>  
>> >>  #include <stdlib.h>
>> >> +#include <sys/ioctl.h>
>> >>  #include <time.h>
>> >>  
>> >> +#include <linux/dma-buf.h>
>> >> +
>> >>  #include <libcamera/formats.h>
>> >>  
>> >>  #include "libcamera/internal/bayer_format.h"
>> >> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
>> >>         }
>> >>  }
>> >>  
>> >> +namespace {
>> >> +
>> >> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
>> >
>> > Now these are in an annoymous namespace, they don't need to be static -
>> > but that can be fixed while applying perhaps or ignored for now if no
>> > further version is required otherwise.
>> >
>> > I'm happy to merge, but under software_isp/* I think we should have an
>> > Acked-by/Reviewed-by by one of the SoftISP developers.
>> >
>> > Milan, Hans ? Are you happy with this patch? Is there any impact with
>> > the Intel devices for instance?
>> 
>> I'm fine with the patch (with the exception below) and it works fine on
>> Debix (with only ~0,5% performance penalty).  It would be nice if Hans
>> tested it on Intel.
>> 
>> > --
>> > Kieran
>> >
>> >
>> >
>> >
>> >> +{
>> >> +       for (const FrameBuffer::Plane &plane : buffer->planes()) {
>> >> +               const int fd = plane.fd.get();
>> >> +               struct dma_buf_sync sync = { syncFlags };
>> >> +               int ret;
>> >> +
>> >> +               ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
>> >> +               if (ret < 0) {
>> >> +                       ret = errno;
>> >> +                       LOG(Debayer, Error)
>> >> +                               << "Syncing buffer FD " << fd << " with flags "
>> >> +                               << syncFlags << " failed: " << strerror(-ret);
>> 
>> I think `ret' and not `-ret' should be used here.  And I would drop
>> `ret = errno' above and use errno here directly.  I'm not sure why
>> Kieran suggested exactly this -- using strerror is right but changing
>> the sign seems wrong (tested in my environment).
>
> This is a pattern we've used throughout libcamera. It's not advised to
> use errno directly in a stream operation ( << ) because other parts of
> the stream or calls, or even the construction of the LOG() class could
> do something that also sets errno.
>
> So we take a copy of errno as soon as the error is detected.

Ah, right, thanks for explanation.

> Yes - I'm wrong on the -ret in that instance. Sorry - I got confused
> because we would only return a negative error value - but in this
> instance we are in a void function that doesn't return anything.
> (there's no point I think).
>
> But yes I inverted the value - sorry. As long as 'errno' is not directly
> used in the LOG() << errno; statements it's fine.
>
> --
> Kieran
>
>> 
>> >> +               }
>> >> +       }
>> >> +}
>> >> +
>> >>  static inline int64_t timeDiff(timespec &after, timespec &before)
>> >>  {
>> >>         return (after.tv_sec - before.tv_sec) * 1000000000LL +
>> >>                (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
>> >>  }
>> >>  
>> >> +} /* namespace */
>> >> +
>> >>  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>> >>  {
>> >>         timespec frameStartTime;
>> >> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>> >>                 clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
>> >>         }
>> >>  
>> >> +       syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
>> >> +       syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
>> >> +
>> >>         green_ = params.green;
>> >>         red_ = swapRedBlueGains_ ? params.blue : params.red;
>> >>         blue_ = swapRedBlueGains_ ? params.red : params.blue;
>> >> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>> >>  
>> >>         metadata.planes()[0].bytesused = out.planes()[0].size();
>> >>  
>> >> +       syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
>> >> +       syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
>> >> +
>> >>         /* Measure before emitting signals */
>> >>         if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
>> >>             ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
>> >> -- 
>> >> 2.46.0
>> >>
>>
Milan Zamazal Sept. 18, 2024, 11:26 a.m. UTC | #6
Robert Mader <robert.mader@collabora.com> writes:

> One thing I'd like to highlight again for the record: while the patch here reduces glitches considerably for
> me, I still occasionally see some when using GL clients like Snapshot.
>
> Assuming that's not because of hardware/driver bugs (I see it on quite different platforms), that could be
> caused by us still waiting for implicit/explicit sync before reusing buffers.
>
> From https://docs.kernel.org/driver-api/dma-buf.html:
>
>> The synchronization provided via DMA_BUF_IOCTL_SYNC only provides cache coherency. It does not prevent
>> other processes or devices from accessing the memory at the same time. If synchronization with a GPU or
>> other device driver is required, it is the client’s responsibility to wait for buffer to be ready for
>> reading or writing before calling this ioctl with DMA_BUF_SYNC_START. Likewise, the client must ensure
>> that follow-up work is not submitted to GPU or other device driver until after this ioctl has been called
>> with DMA_BUF_SYNC_END?
>>
>> If the driver or API with which the client is interacting uses implicit synchronization, waiting for prior
>> work to complete can be done via poll() on the DMA buffer file descriptor. If the driver or API requires
>> explicit synchronization, the client may have to wait on a sync_file or other synchronization primitive
>> outside the scope of the DMA buffer API.
>>
> I wonder if we should add a swISP TODO for this - or is opening a bugzilla issue for tracking enough?

I would prefer tracking it in bugzilla.  I consider the software ISP
TODO being a snapshot of the implementation requirements at the given
moment, which should be cleared ASAP and not used anymore.

> P.S.: I'm not entirely sure if the udmabuf sync implementation really doesn't wait for implicit fences - but
> if it does, there's no guarantee that things stay that way.
>
> On 16.09.24 18:12, Robert Mader wrote:
>> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
>> correct output. Not doing so currently results in occasional tearing
>> and/or backlashes in GL/VK clients that use the buffers directly for
>> rendering.
>>
>> An alternative approach to have the sync code in `MappedFrameBuffer` was
>> considered but rejected for now, in order to allow clients more
>> flexibility.
>>
>> Signed-off-by: Robert Mader <robert.mader@collabora.com>
>>
>> ---
>>
>> Changes in v4:
>>   - Fixed errno handling
>>   - Added sync flags to error message
>>   - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
>>
>> Changes in v3:
>>   - add syncBufferForCPU() helper function
>>
>> Changes in v2:
>>   - sync input buffer as well
>>   - update commit title accordingly
>>   - small changes to the commit message
>>   - linting fixes, should pass now
>> ---
>>   src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
>>   1 file changed, 30 insertions(+)
>>
>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
>> index 077f7f4b..96cf2c71 100644
>> --- a/src/libcamera/software_isp/debayer_cpu.cpp
>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
>> @@ -12,8 +12,11 @@
>>   #include "debayer_cpu.h"
>>     #include <stdlib.h>
>> +#include <sys/ioctl.h>
>>   #include <time.h>
>>   +#include <linux/dma-buf.h>
>> +
>>   #include <libcamera/formats.h>
>>     #include "libcamera/internal/bayer_format.h"
>> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
>>   	}
>>   }
>>   +namespace {
>> +
>> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
>> +{
>> +	for (const FrameBuffer::Plane &plane : buffer->planes()) {
>> +		const int fd = plane.fd.get();
>> +		struct dma_buf_sync sync = { syncFlags };
>> +		int ret;
>> +
>> +		ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
>> +		if (ret < 0) {
>> +			ret = errno;
>> +			LOG(Debayer, Error)
>> +				<< "Syncing buffer FD " << fd << " with flags "
>> +				<< syncFlags << " failed: " << strerror(-ret);
>> +		}
>> +	}
>> +}
>> +
>>   static inline int64_t timeDiff(timespec &after, timespec &before)
>>   {
>>   	return (after.tv_sec - before.tv_sec) * 1000000000LL +
>>   	       (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
>>   }
>>   +} /* namespace */
>> +
>>   void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>>   {
>>   	timespec frameStartTime;
>> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>>   		clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
>>   	}
>>   +	syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
>> +	syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
>> +
>>   	green_ = params.green;
>>   	red_ = swapRedBlueGains_ ? params.blue : params.red;
>>   	blue_ = swapRedBlueGains_ ? params.red : params.blue;
>> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>>     	metadata.planes()[0].bytesused = out.planes()[0].size();
>>   +	syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
>> +	syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
>> +
>>   	/* Measure before emitting signals */
>>   	if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
>>   	    ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
Laurent Pinchart Sept. 18, 2024, 3:40 p.m. UTC | #7
On Wed, Sep 18, 2024 at 01:26:41PM +0200, Milan Zamazal wrote:
> Robert Mader <robert.mader@collabora.com> writes:
> 
> > One thing I'd like to highlight again for the record: while the patch here reduces glitches considerably for
> > me, I still occasionally see some when using GL clients like Snapshot.
> >
> > Assuming that's not because of hardware/driver bugs (I see it on quite different platforms), that could be
> > caused by us still waiting for implicit/explicit sync before reusing buffers.
> >
> > From https://docs.kernel.org/driver-api/dma-buf.html:
> >
> >> The synchronization provided via DMA_BUF_IOCTL_SYNC only provides cache coherency. It does not prevent
> >> other processes or devices from accessing the memory at the same time. If synchronization with a GPU or
> >> other device driver is required, it is the client’s responsibility to wait for buffer to be ready for
> >> reading or writing before calling this ioctl with DMA_BUF_SYNC_START. Likewise, the client must ensure
> >> that follow-up work is not submitted to GPU or other device driver until after this ioctl has been called
> >> with DMA_BUF_SYNC_END?
> >>
> >> If the driver or API with which the client is interacting uses implicit synchronization, waiting for prior
> >> work to complete can be done via poll() on the DMA buffer file descriptor. If the driver or API requires
> >> explicit synchronization, the client may have to wait on a sync_file or other synchronization primitive
> >> outside the scope of the DMA buffer API.
> >>
> > I wonder if we should add a swISP TODO for this - or is opening a bugzilla issue for tracking enough?
> 
> I would prefer tracking it in bugzilla.  I consider the software ISP
> TODO being a snapshot of the implementation requirements at the given
> moment, which should be cleared ASAP and not used anymore.

Agreed.

> > P.S.: I'm not entirely sure if the udmabuf sync implementation really doesn't wait for implicit fences - but
> > if it does, there's no guarantee that things stay that way.
> >
> > On 16.09.24 18:12, Robert Mader wrote:
> >> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
> >> correct output. Not doing so currently results in occasional tearing
> >> and/or backlashes in GL/VK clients that use the buffers directly for
> >> rendering.
> >>
> >> An alternative approach to have the sync code in `MappedFrameBuffer` was
> >> considered but rejected for now, in order to allow clients more
> >> flexibility.
> >>
> >> Signed-off-by: Robert Mader <robert.mader@collabora.com>
> >>
> >> ---
> >>
> >> Changes in v4:
> >>   - Fixed errno handling
> >>   - Added sync flags to error message
> >>   - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
> >>
> >> Changes in v3:
> >>   - add syncBufferForCPU() helper function
> >>
> >> Changes in v2:
> >>   - sync input buffer as well
> >>   - update commit title accordingly
> >>   - small changes to the commit message
> >>   - linting fixes, should pass now
> >> ---
> >>   src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
> >>   1 file changed, 30 insertions(+)
> >>
> >> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> >> index 077f7f4b..96cf2c71 100644
> >> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> >> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> >> @@ -12,8 +12,11 @@
> >>   #include "debayer_cpu.h"
> >>     #include <stdlib.h>
> >> +#include <sys/ioctl.h>
> >>   #include <time.h>
> >>   +#include <linux/dma-buf.h>
> >> +
> >>   #include <libcamera/formats.h>
> >>     #include "libcamera/internal/bayer_format.h"
> >> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
> >>   	}
> >>   }
> >>   +namespace {
> >> +
> >> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
> >> +{
> >> +	for (const FrameBuffer::Plane &plane : buffer->planes()) {
> >> +		const int fd = plane.fd.get();
> >> +		struct dma_buf_sync sync = { syncFlags };
> >> +		int ret;
> >> +
> >> +		ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
> >> +		if (ret < 0) {
> >> +			ret = errno;
> >> +			LOG(Debayer, Error)
> >> +				<< "Syncing buffer FD " << fd << " with flags "
> >> +				<< syncFlags << " failed: " << strerror(-ret);
> >> +		}
> >> +	}
> >> +}
> >> +
> >>   static inline int64_t timeDiff(timespec &after, timespec &before)
> >>   {
> >>   	return (after.tv_sec - before.tv_sec) * 1000000000LL +
> >>   	       (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
> >>   }
> >>   +} /* namespace */
> >> +
> >>   void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
> >>   {
> >>   	timespec frameStartTime;
> >> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
> >>   		clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
> >>   	}
> >>   +	syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
> >> +	syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
> >> +
> >>   	green_ = params.green;
> >>   	red_ = swapRedBlueGains_ ? params.blue : params.red;
> >>   	blue_ = swapRedBlueGains_ ? params.red : params.blue;
> >> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
> >>     	metadata.planes()[0].bytesused = out.planes()[0].size();
> >>   +	syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
> >> +	syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
> >> +
> >>   	/* Measure before emitting signals */
> >>   	if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
> >>   	    ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
>
Hans de Goede Sept. 18, 2024, 7:03 p.m. UTC | #8
Hi,

On 18-Sep-24 11:55 AM, Milan Zamazal wrote:
> Kieran Bingham <kieran.bingham@ideasonboard.com> writes:
> 
>> Quoting Robert Mader (2024-09-16 17:12:31)
>>> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
>>> correct output. Not doing so currently results in occasional tearing
>>
>>> and/or backlashes in GL/VK clients that use the buffers directly for
>>> rendering.
>>>
>>> An alternative approach to have the sync code in `MappedFrameBuffer` was
>>> considered but rejected for now, in order to allow clients more
>>> flexibility.
>>>
>>> Signed-off-by: Robert Mader <robert.mader@collabora.com>
>>>
>>> ---
>>>
>>> Changes in v4:
>>>  - Fixed errno handling
>>>  - Added sync flags to error message
>>>  - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
>>>
>>> Changes in v3:
>>>  - add syncBufferForCPU() helper function
>>>
>>> Changes in v2:
>>>  - sync input buffer as well
>>>  - update commit title accordingly
>>>  - small changes to the commit message
>>>  - linting fixes, should pass now
>>> ---
>>>  src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
>>>  1 file changed, 30 insertions(+)
>>>
>>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
>>> index 077f7f4b..96cf2c71 100644
>>> --- a/src/libcamera/software_isp/debayer_cpu.cpp
>>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
>>> @@ -12,8 +12,11 @@
>>>  #include "debayer_cpu.h"
>>>  
>>>  #include <stdlib.h>
>>> +#include <sys/ioctl.h>
>>>  #include <time.h>
>>>  
>>> +#include <linux/dma-buf.h>
>>> +
>>>  #include <libcamera/formats.h>
>>>  
>>>  #include "libcamera/internal/bayer_format.h"
>>> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
>>>         }
>>>  }
>>>  
>>> +namespace {
>>> +
>>> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
>>
>> Now these are in an annoymous namespace, they don't need to be static -
>> but that can be fixed while applying perhaps or ignored for now if no
>> further version is required otherwise.
>>
>> I'm happy to merge, but under software_isp/* I think we should have an
>> Acked-by/Reviewed-by by one of the SoftISP developers.
>>
>> Milan, Hans ? Are you happy with this patch? Is there any impact with
>> the Intel devices for instance?
> 
> I'm fine with the patch (with the exception below) and it works fine on
> Debix (with only ~0,5% performance penalty).  It would be nice if Hans
> tested it on Intel.

I have tested this on a ThinkPad X1 Yoga gen 8 with IPU6 + ov2740
sensor running at 1920x1080. Things still work and the builtin
bench function shows an increase of the frame-processing time
(including the syncing) from ~6.1 ms/frame to ~6.3 ms/frame which
I consider an acceptable slowdown:

Tested-by: Hans de Goede <hdegoede@redhat.com> # IPU6 + ov2740

Regards,

Hans






> 
>> --
>> Kieran
>>
>>
>>
>>
>>> +{
>>> +       for (const FrameBuffer::Plane &plane : buffer->planes()) {
>>> +               const int fd = plane.fd.get();
>>> +               struct dma_buf_sync sync = { syncFlags };
>>> +               int ret;
>>> +
>>> +               ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
>>> +               if (ret < 0) {
>>> +                       ret = errno;
>>> +                       LOG(Debayer, Error)
>>> +                               << "Syncing buffer FD " << fd << " with flags "
>>> +                               << syncFlags << " failed: " << strerror(-ret);
> 
> I think `ret' and not `-ret' should be used here.  And I would drop
> `ret = errno' above and use errno here directly.  I'm not sure why
> Kieran suggested exactly this -- using strerror is right but changing
> the sign seems wrong (tested in my environment).
> 
>>> +               }
>>> +       }
>>> +}
>>> +
>>>  static inline int64_t timeDiff(timespec &after, timespec &before)
>>>  {
>>>         return (after.tv_sec - before.tv_sec) * 1000000000LL +
>>>                (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
>>>  }
>>>  
>>> +} /* namespace */
>>> +
>>>  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>>>  {
>>>         timespec frameStartTime;
>>> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>>>                 clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
>>>         }
>>>  
>>> +       syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
>>> +       syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
>>> +
>>>         green_ = params.green;
>>>         red_ = swapRedBlueGains_ ? params.blue : params.red;
>>>         blue_ = swapRedBlueGains_ ? params.red : params.blue;
>>> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>>>  
>>>         metadata.planes()[0].bytesused = out.planes()[0].size();
>>>  
>>> +       syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
>>> +       syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
>>> +
>>>         /* Measure before emitting signals */
>>>         if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
>>>             ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
>>> -- 
>>> 2.46.0
>>>
>
Kieran Bingham Sept. 19, 2024, 10:15 a.m. UTC | #9
Quoting Hans de Goede (2024-09-18 20:03:47)
> Hi,
> 
> On 18-Sep-24 11:55 AM, Milan Zamazal wrote:
> > Kieran Bingham <kieran.bingham@ideasonboard.com> writes:
> > 
> >> Quoting Robert Mader (2024-09-16 17:12:31)
> >>> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
> >>> correct output. Not doing so currently results in occasional tearing
> >>
> >>> and/or backlashes in GL/VK clients that use the buffers directly for
> >>> rendering.
> >>>
> >>> An alternative approach to have the sync code in `MappedFrameBuffer` was
> >>> considered but rejected for now, in order to allow clients more
> >>> flexibility.
> >>>
> >>> Signed-off-by: Robert Mader <robert.mader@collabora.com>
> >>>
> >>> ---
> >>>
> >>> Changes in v4:
> >>>  - Fixed errno handling
> >>>  - Added sync flags to error message
> >>>  - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
> >>>
> >>> Changes in v3:
> >>>  - add syncBufferForCPU() helper function
> >>>
> >>> Changes in v2:
> >>>  - sync input buffer as well
> >>>  - update commit title accordingly
> >>>  - small changes to the commit message
> >>>  - linting fixes, should pass now
> >>> ---
> >>>  src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
> >>>  1 file changed, 30 insertions(+)
> >>>
> >>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> >>> index 077f7f4b..96cf2c71 100644
> >>> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> >>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> >>> @@ -12,8 +12,11 @@
> >>>  #include "debayer_cpu.h"
> >>>  
> >>>  #include <stdlib.h>
> >>> +#include <sys/ioctl.h>
> >>>  #include <time.h>
> >>>  
> >>> +#include <linux/dma-buf.h>
> >>> +
> >>>  #include <libcamera/formats.h>
> >>>  
> >>>  #include "libcamera/internal/bayer_format.h"
> >>> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
> >>>         }
> >>>  }
> >>>  
> >>> +namespace {
> >>> +
> >>> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
> >>
> >> Now these are in an annoymous namespace, they don't need to be static -
> >> but that can be fixed while applying perhaps or ignored for now if no
> >> further version is required otherwise.
> >>
> >> I'm happy to merge, but under software_isp/* I think we should have an
> >> Acked-by/Reviewed-by by one of the SoftISP developers.
> >>
> >> Milan, Hans ? Are you happy with this patch? Is there any impact with
> >> the Intel devices for instance?
> > 
> > I'm fine with the patch (with the exception below) and it works fine on
> > Debix (with only ~0,5% performance penalty).  It would be nice if Hans
> > tested it on Intel.
> 
> I have tested this on a ThinkPad X1 Yoga gen 8 with IPU6 + ov2740
> sensor running at 1920x1080. Things still work and the builtin
> bench function shows an increase of the frame-processing time
> (including the syncing) from ~6.1 ms/frame to ~6.3 ms/frame which
> I consider an acceptable slowdown:
> 
> Tested-by: Hans de Goede <hdegoede@redhat.com> # IPU6 + ov2740

Checking the stats is a great idea and I've just realised I can also do
this ;-)

On a Lenovo X13s with OV5675 2584x1944-ABGR8888 output pipeline:

Without the patch: 67446 (average of below)

[0:53:27.924908852] [12249]  INFO Debayer debayer_cpu.cpp:772 Processed 30 frames in 2026886us, 67562 us/frame
[0:54:04.343961479] [12316]  INFO Debayer debayer_cpu.cpp:772 Processed 30 frames in 2023919us, 67463 us/frame
[0:56:43.206420524] [13810]  INFO Debayer debayer_cpu.cpp:772 Processed 30 frames in 2021544us, 67384 us/frame
[1:03:39.259725236] [15813]  INFO Debayer debayer_cpu.cpp:772 Processed 30 frames in 2021270us, 67375 us/frame



With the patch: 67620 (average of below)

[0:59:29.918112330] [14793]  INFO Debayer debayer_cpu.cpp:802 Processed 30 frames in 2026329us, 67544 us/frame
[1:00:04.847115778] [14834]  INFO Debayer debayer_cpu.cpp:802 Processed 30 frames in 2033635us, 67787 us/frame
[1:00:33.564276945] [15161]  INFO Debayer debayer_cpu.cpp:802 Processed 30 frames in 2028140us, 67604 us/frame
[1:01:26.971334924] [15200]  INFO Debayer debayer_cpu.cpp:802 Processed 30 frames in 2026393us, 67546 us/frame


I think that's a ... 0.25% slowdown? I think I can live with that if
it's providing cache handling guarantees ;-)

Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com> # Lenovo X13s + OV5675

--
Kieran


> 
> Regards,
> 
> Hans
> 
> 
> 
> 
> 
> 
> > 
> >> --
> >> Kieran
> >>
> >>
> >>
> >>
> >>> +{
> >>> +       for (const FrameBuffer::Plane &plane : buffer->planes()) {
> >>> +               const int fd = plane.fd.get();
> >>> +               struct dma_buf_sync sync = { syncFlags };
> >>> +               int ret;
> >>> +
> >>> +               ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
> >>> +               if (ret < 0) {
> >>> +                       ret = errno;
> >>> +                       LOG(Debayer, Error)
> >>> +                               << "Syncing buffer FD " << fd << " with flags "
> >>> +                               << syncFlags << " failed: " << strerror(-ret);
> > 
> > I think `ret' and not `-ret' should be used here.  And I would drop
> > `ret = errno' above and use errno here directly.  I'm not sure why
> > Kieran suggested exactly this -- using strerror is right but changing
> > the sign seems wrong (tested in my environment).
> > 
> >>> +               }
> >>> +       }
> >>> +}
> >>> +
> >>>  static inline int64_t timeDiff(timespec &after, timespec &before)
> >>>  {
> >>>         return (after.tv_sec - before.tv_sec) * 1000000000LL +
> >>>                (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
> >>>  }
> >>>  
> >>> +} /* namespace */
> >>> +
> >>>  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
> >>>  {
> >>>         timespec frameStartTime;
> >>> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
> >>>                 clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
> >>>         }
> >>>  
> >>> +       syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
> >>> +       syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
> >>> +
> >>>         green_ = params.green;
> >>>         red_ = swapRedBlueGains_ ? params.blue : params.red;
> >>>         blue_ = swapRedBlueGains_ ? params.red : params.blue;
> >>> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
> >>>  
> >>>         metadata.planes()[0].bytesused = out.planes()[0].size();
> >>>  
> >>> +       syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
> >>> +       syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
> >>> +
> >>>         /* Measure before emitting signals */
> >>>         if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
> >>>             ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
> >>> -- 
> >>> 2.46.0
> >>>
> > 
>
Kieran Bingham Sept. 20, 2024, 11:47 a.m. UTC | #10
Quoting Robert Mader (2024-09-16 17:12:31)
> Using `DMA_BUF_IOCTL_SYNC` is required for DMABUFs in order to ensure
> correct output. Not doing so currently results in occasional tearing
> and/or backlashes in GL/VK clients that use the buffers directly for
> rendering.
> 
> An alternative approach to have the sync code in `MappedFrameBuffer` was
> considered but rejected for now, in order to allow clients more
> flexibility.
> 
> Signed-off-by: Robert Mader <robert.mader@collabora.com>
> 
> ---
> 
> Changes in v4:
>  - Fixed errno handling
>  - Added sync flags to error message
>  - Wrapped syncBufferForCPU() and timeDiff() into an anonymous namespace
> 
> Changes in v3:
>  - add syncBufferForCPU() helper function
> 
> Changes in v2:
>  - sync input buffer as well
>  - update commit title accordingly
>  - small changes to the commit message
>  - linting fixes, should pass now
> ---
>  src/libcamera/software_isp/debayer_cpu.cpp | 30 ++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> index 077f7f4b..96cf2c71 100644
> --- a/src/libcamera/software_isp/debayer_cpu.cpp
> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> @@ -12,8 +12,11 @@
>  #include "debayer_cpu.h"
>  
>  #include <stdlib.h>
> +#include <sys/ioctl.h>
>  #include <time.h>
>  
> +#include <linux/dma-buf.h>
> +
>  #include <libcamera/formats.h>
>  
>  #include "libcamera/internal/bayer_format.h"
> @@ -718,12 +721,33 @@ void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
>         }
>  }
>  
> +namespace {
> +
> +static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)

Doesn't need to be static.

> +{
> +       for (const FrameBuffer::Plane &plane : buffer->planes()) {
> +               const int fd = plane.fd.get();
> +               struct dma_buf_sync sync = { syncFlags };
> +               int ret;
> +
> +               ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
> +               if (ret < 0) {
> +                       ret = errno;
> +                       LOG(Debayer, Error)
> +                               << "Syncing buffer FD " << fd << " with flags "
> +                               << syncFlags << " failed: " << strerror(-ret);

As noted by Milan, my proposal was wrong here. This should be
strerror(ret).

> +               }
> +       }
> +}
> +
>  static inline int64_t timeDiff(timespec &after, timespec &before)

And we can remove this static at the same time.

With those ...

Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>

But I've already fixed those up locally so I'll send v5 which I think
can then be merged so no further action required.

--
Kieran

>  {
>         return (after.tv_sec - before.tv_sec) * 1000000000LL +
>                (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
>  }
>  
> +} /* namespace */
> +
>  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
>  {
>         timespec frameStartTime;
> @@ -733,6 +757,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>                 clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
>         }
>  
> +       syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
> +       syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
> +
>         green_ = params.green;
>         red_ = swapRedBlueGains_ ? params.blue : params.red;
>         blue_ = swapRedBlueGains_ ? params.red : params.blue;
> @@ -760,6 +787,9 @@ void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
>  
>         metadata.planes()[0].bytesused = out.planes()[0].size();
>  
> +       syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
> +       syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
> +
>         /* Measure before emitting signals */
>         if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
>             ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
> -- 
> 2.46.0
>

Patch
diff mbox series

diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
index 077f7f4b..96cf2c71 100644
--- a/src/libcamera/software_isp/debayer_cpu.cpp
+++ b/src/libcamera/software_isp/debayer_cpu.cpp
@@ -12,8 +12,11 @@ 
 #include "debayer_cpu.h"
 
 #include <stdlib.h>
+#include <sys/ioctl.h>
 #include <time.h>
 
+#include <linux/dma-buf.h>
+
 #include <libcamera/formats.h>
 
 #include "libcamera/internal/bayer_format.h"
@@ -718,12 +721,33 @@  void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
 	}
 }
 
+namespace {
+
+static void syncBufferForCPU(FrameBuffer *buffer, uint64_t syncFlags)
+{
+	for (const FrameBuffer::Plane &plane : buffer->planes()) {
+		const int fd = plane.fd.get();
+		struct dma_buf_sync sync = { syncFlags };
+		int ret;
+
+		ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
+		if (ret < 0) {
+			ret = errno;
+			LOG(Debayer, Error)
+				<< "Syncing buffer FD " << fd << " with flags "
+				<< syncFlags << " failed: " << strerror(-ret);
+		}
+	}
+}
+
 static inline int64_t timeDiff(timespec &after, timespec &before)
 {
 	return (after.tv_sec - before.tv_sec) * 1000000000LL +
 	       (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
 }
 
+} /* namespace */
+
 void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
 {
 	timespec frameStartTime;
@@ -733,6 +757,9 @@  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
 		clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
 	}
 
+	syncBufferForCPU(input, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
+	syncBufferForCPU(output, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
+
 	green_ = params.green;
 	red_ = swapRedBlueGains_ ? params.blue : params.red;
 	blue_ = swapRedBlueGains_ ? params.red : params.blue;
@@ -760,6 +787,9 @@  void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams
 
 	metadata.planes()[0].bytesused = out.planes()[0].size();
 
+	syncBufferForCPU(output, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
+	syncBufferForCPU(input, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
+
 	/* Measure before emitting signals */
 	if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
 	    ++measuredFrames_ > DebayerCpu::kFramesToSkip) {