Message ID | 20220530142722.57618-12-tomi.valkeinen@ideasonboard.com |
---|---|
State | Accepted |
Headers | show |
Series |
|
Related | show |
Hi Tomi, Thank you for the patch. On Mon, May 30, 2022 at 05:27:17PM +0300, Tomi Valkeinen wrote: > We always call CameraManager.read_event() and > CameraManager.get_ready_requests(), so to simplify the use merge the > read_event() into the get_ready_requests(). > > This has the side effect that get_ready_requests() will now block if > there is no event ready. If we ever need to call get_ready_requests() in > a polling manner we will need a new function which behaves differently. > > However, afaics the only sensible way to manage the event loop is to use > select/poll on the eventfd and then call get_ready_requests() once, > which is the use case what the current merged function supports. > > Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > --- > src/py/cam/cam.py | 2 -- > src/py/libcamera/py_main.cpp | 7 ++----- > test/py/unittests.py | 4 ---- > 3 files changed, 2 insertions(+), 11 deletions(-) > > diff --git a/src/py/cam/cam.py b/src/py/cam/cam.py > index bf8529d9..2ae89fa8 100755 > --- a/src/py/cam/cam.py > +++ b/src/py/cam/cam.py > @@ -243,8 +243,6 @@ class CaptureState: > # Called from renderer when there is a libcamera event > def event_handler(self): > try: > - self.cm.read_event() > - > reqs = self.cm.get_ready_requests() > > for req in reqs: > diff --git a/src/py/libcamera/py_main.cpp b/src/py/libcamera/py_main.cpp > index fcf009f0..505cc3dc 100644 > --- a/src/py/libcamera/py_main.cpp > +++ b/src/py/libcamera/py_main.cpp > @@ -213,15 +213,12 @@ PYBIND11_MODULE(_libcamera, m) > return gEventfd; > }) > > - .def("read_event", [](CameraManager &) { > + .def("get_ready_requests", [](CameraManager &) { > uint8_t buf[8]; > > - int ret = read(gEventfd, buf, 8); > - if (ret != 8) > + if (read(gEventfd, buf, 8) != 8) > throw std::system_error(errno, std::generic_category()); We need a memory barrier here to prevent reordering. I'm quite sure there's one somewhere due to the read() call and the lock below, but I haven't been able to figure out the C++ rule that guarantees this. Does anyone know ? Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> > - }) > > - .def("get_ready_requests", [](CameraManager &) { > std::vector<Request *> v; > > { > diff --git a/test/py/unittests.py b/test/py/unittests.py > index 33b35a0a..9adc4337 100755 > --- a/test/py/unittests.py > +++ b/test/py/unittests.py > @@ -210,8 +210,6 @@ class SimpleCaptureMethods(CameraTesterBase): > reqs = [] > > while True: > - cm.read_event() > - > ready_reqs = cm.get_ready_requests() > > reqs += ready_reqs > @@ -283,8 +281,6 @@ class SimpleCaptureMethods(CameraTesterBase): > while running: > events = sel.select() > for key, _ in events: > - cm.read_event() > - > ready_reqs = cm.get_ready_requests() > > reqs += ready_reqs
On 30/05/2022 18:53, Laurent Pinchart wrote: > Hi Tomi, > > Thank you for the patch. > > On Mon, May 30, 2022 at 05:27:17PM +0300, Tomi Valkeinen wrote: >> We always call CameraManager.read_event() and >> CameraManager.get_ready_requests(), so to simplify the use merge the >> read_event() into the get_ready_requests(). >> >> This has the side effect that get_ready_requests() will now block if >> there is no event ready. If we ever need to call get_ready_requests() in >> a polling manner we will need a new function which behaves differently. >> >> However, afaics the only sensible way to manage the event loop is to use >> select/poll on the eventfd and then call get_ready_requests() once, >> which is the use case what the current merged function supports. >> >> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> >> --- >> src/py/cam/cam.py | 2 -- >> src/py/libcamera/py_main.cpp | 7 ++----- >> test/py/unittests.py | 4 ---- >> 3 files changed, 2 insertions(+), 11 deletions(-) >> >> diff --git a/src/py/cam/cam.py b/src/py/cam/cam.py >> index bf8529d9..2ae89fa8 100755 >> --- a/src/py/cam/cam.py >> +++ b/src/py/cam/cam.py >> @@ -243,8 +243,6 @@ class CaptureState: >> # Called from renderer when there is a libcamera event >> def event_handler(self): >> try: >> - self.cm.read_event() >> - >> reqs = self.cm.get_ready_requests() >> >> for req in reqs: >> diff --git a/src/py/libcamera/py_main.cpp b/src/py/libcamera/py_main.cpp >> index fcf009f0..505cc3dc 100644 >> --- a/src/py/libcamera/py_main.cpp >> +++ b/src/py/libcamera/py_main.cpp >> @@ -213,15 +213,12 @@ PYBIND11_MODULE(_libcamera, m) >> return gEventfd; >> }) >> >> - .def("read_event", [](CameraManager &) { >> + .def("get_ready_requests", [](CameraManager &) { >> uint8_t buf[8]; >> >> - int ret = read(gEventfd, buf, 8); >> - if (ret != 8) >> + if (read(gEventfd, buf, 8) != 8) >> throw std::system_error(errno, std::generic_category()); > > We need a memory barrier here to prevent reordering. I'm quite sure We then need the same in handleRequestCompleted(). > there's one somewhere due to the read() call and the lock below, but I > haven't been able to figure out the C++ rule that guarantees this. Does > anyone know ? https://en.cppreference.com/w/cpp/atomic/memory_order So the mutex brings the normal acquire-release ordering. I would think a syscall includes a barrier, but I can't find any details right away. Tomi
On 30/05/2022 19:57, Tomi Valkeinen wrote: >>> diff --git a/src/py/libcamera/py_main.cpp b/src/py/libcamera/py_main.cpp >>> index fcf009f0..505cc3dc 100644 >>> --- a/src/py/libcamera/py_main.cpp >>> +++ b/src/py/libcamera/py_main.cpp >>> @@ -213,15 +213,12 @@ PYBIND11_MODULE(_libcamera, m) >>> return gEventfd; >>> }) >>> - .def("read_event", [](CameraManager &) { >>> + .def("get_ready_requests", [](CameraManager &) { >>> uint8_t buf[8]; >>> - int ret = read(gEventfd, buf, 8); >>> - if (ret != 8) >>> + if (read(gEventfd, buf, 8) != 8) >>> throw std::system_error(errno, >>> std::generic_category()); >> >> We need a memory barrier here to prevent reordering. I'm quite sure > > We then need the same in handleRequestCompleted(). > >> there's one somewhere due to the read() call and the lock below, but I >> haven't been able to figure out the C++ rule that guarantees this. Does >> anyone know ? > > https://en.cppreference.com/w/cpp/atomic/memory_order > > So the mutex brings the normal acquire-release ordering. I would think a > syscall includes a barrier, but I can't find any details right away. Perhaps something like this? I don't know if the fences are needed, but maybe better safe than sorry. diff --git a/src/py/libcamera/py_main.cpp b/src/py/libcamera/py_main.cpp index 505cc3dc..b035a101 100644 --- a/src/py/libcamera/py_main.cpp +++ b/src/py/libcamera/py_main.cpp @@ -5,6 +5,7 @@ * Python bindings */ +#include <atomic> #include <mutex> #include <stdexcept> #include <sys/eventfd.h> @@ -116,6 +117,12 @@ static void handleRequestCompleted(Request *req) gReqList.push_back(req); } + /* + * Prevent the write below from possibly being reordered to happen + * before the push_back() above. + */ + std::atomic_thread_fence(std::memory_order_acquire); + uint64_t v = 1; size_t s = write(gEventfd, &v, 8); /* @@ -219,6 +226,12 @@ PYBIND11_MODULE(_libcamera, m) if (read(gEventfd, buf, 8) != 8) throw std::system_error(errno, std::generic_category()); + /* + * Prevent the read above from possibly being reordered + * to happen after the swap() below. + */ + std::atomic_thread_fence(std::memory_order_release); + std::vector<Request *> v; {
On 31/05/2022 08:51, Tomi Valkeinen wrote: > On 30/05/2022 19:57, Tomi Valkeinen wrote: > >>>> diff --git a/src/py/libcamera/py_main.cpp >>>> b/src/py/libcamera/py_main.cpp >>>> index fcf009f0..505cc3dc 100644 >>>> --- a/src/py/libcamera/py_main.cpp >>>> +++ b/src/py/libcamera/py_main.cpp >>>> @@ -213,15 +213,12 @@ PYBIND11_MODULE(_libcamera, m) >>>> return gEventfd; >>>> }) >>>> - .def("read_event", [](CameraManager &) { >>>> + .def("get_ready_requests", [](CameraManager &) { >>>> uint8_t buf[8]; >>>> - int ret = read(gEventfd, buf, 8); >>>> - if (ret != 8) >>>> + if (read(gEventfd, buf, 8) != 8) >>>> throw std::system_error(errno, >>>> std::generic_category()); >>> >>> We need a memory barrier here to prevent reordering. I'm quite sure >> >> We then need the same in handleRequestCompleted(). >> >>> there's one somewhere due to the read() call and the lock below, but I >>> haven't been able to figure out the C++ rule that guarantees this. Does >>> anyone know ? >> >> https://en.cppreference.com/w/cpp/atomic/memory_order >> >> So the mutex brings the normal acquire-release ordering. I would think >> a syscall includes a barrier, but I can't find any details right away. > > Perhaps something like this? I don't know if the fences are needed, but > maybe > better safe than sorry. > > diff --git a/src/py/libcamera/py_main.cpp b/src/py/libcamera/py_main.cpp > index 505cc3dc..b035a101 100644 > --- a/src/py/libcamera/py_main.cpp > +++ b/src/py/libcamera/py_main.cpp > @@ -5,6 +5,7 @@ > * Python bindings > */ > > +#include <atomic> > #include <mutex> > #include <stdexcept> > #include <sys/eventfd.h> > @@ -116,6 +117,12 @@ static void handleRequestCompleted(Request *req) > gReqList.push_back(req); > } > > + /* > + * Prevent the write below from possibly being reordered to happen > + * before the push_back() above. > + */ > + std::atomic_thread_fence(std::memory_order_acquire); > + > uint64_t v = 1; > size_t s = write(gEventfd, &v, 8); > /* > @@ -219,6 +226,12 @@ PYBIND11_MODULE(_libcamera, m) > if (read(gEventfd, buf, 8) != 8) > throw std::system_error(errno, std::generic_category()); > > + /* > + * Prevent the read above from possibly being reordered > + * to happen after the swap() below. > + */ > + std::atomic_thread_fence(std::memory_order_release); > + > std::vector<Request *> v; > > { I think this could use std::atomic_signal_fence, but I'm pretty sure there's no real life performance difference. Tomi
Hi Tomi, On Thu, Jun 02, 2022 at 11:27:35AM +0300, Tomi Valkeinen wrote: > On 31/05/2022 08:51, Tomi Valkeinen wrote: > > On 30/05/2022 19:57, Tomi Valkeinen wrote: > > > >>>> diff --git a/src/py/libcamera/py_main.cpp > >>>> b/src/py/libcamera/py_main.cpp > >>>> index fcf009f0..505cc3dc 100644 > >>>> --- a/src/py/libcamera/py_main.cpp > >>>> +++ b/src/py/libcamera/py_main.cpp > >>>> @@ -213,15 +213,12 @@ PYBIND11_MODULE(_libcamera, m) > >>>> return gEventfd; > >>>> }) > >>>> - .def("read_event", [](CameraManager &) { > >>>> + .def("get_ready_requests", [](CameraManager &) { > >>>> uint8_t buf[8]; > >>>> - int ret = read(gEventfd, buf, 8); > >>>> - if (ret != 8) > >>>> + if (read(gEventfd, buf, 8) != 8) > >>>> throw std::system_error(errno, std::generic_category()); > >>> > >>> We need a memory barrier here to prevent reordering. I'm quite sure > >> > >> We then need the same in handleRequestCompleted(). > >> > >>> there's one somewhere due to the read() call and the lock below, but I > >>> haven't been able to figure out the C++ rule that guarantees this. Does > >>> anyone know ? > >> > >> https://en.cppreference.com/w/cpp/atomic/memory_order > >> > >> So the mutex brings the normal acquire-release ordering. I would think > >> a syscall includes a barrier, but I can't find any details right away. > > > > Perhaps something like this? I don't know if the fences are needed, but maybe > > better safe than sorry. Thanks for all the information. If I understand things correctly, the mutex should be enough. See below (where I'm sure I'll say many stupid, or at least incorrect, things). > > diff --git a/src/py/libcamera/py_main.cpp b/src/py/libcamera/py_main.cpp > > index 505cc3dc..b035a101 100644 > > --- a/src/py/libcamera/py_main.cpp > > +++ b/src/py/libcamera/py_main.cpp > > @@ -5,6 +5,7 @@ > > * Python bindings > > */ > > > > +#include <atomic> > > #include <mutex> > > #include <stdexcept> > > #include <sys/eventfd.h> > > @@ -116,6 +117,12 @@ static void handleRequestCompleted(Request *req) Adding some more context: { std::lock_guard guard(gReqlistMutex); ^---- [1] > > gReqList.push_back(req); > > } ^---- [2] [1] locks the mutex, which is an acquire operation. It means that no read or write in the same thread after the lock() can be reordered before it. [2] unlocks the mutex, which is a release operation. It means that no read or write in the same thread before the unlock() can be reordered after it. On the reader side, we could thus observe the write() below before the push_back, but never before the lock(). Effectively, what we could see on the reader thread, assuming that the write() doesn't contain any barrier (which is likely incorrect), is { std::lock_guard guard(gReqlistMutex); write(gEventfd, &v, 8); gReqList.push_back(req); } > > > > + /* > > + * Prevent the write below from possibly being reordered to happen > > + * before the push_back() above. > > + */ > > + std::atomic_thread_fence(std::memory_order_acquire); > > + > > uint64_t v = 1; > > size_t s = write(gEventfd, &v, 8); > > /* > > @@ -219,6 +226,12 @@ PYBIND11_MODULE(_libcamera, m) > > if (read(gEventfd, buf, 8) != 8) > > throw std::system_error(errno, std::generic_category()); > > > > + /* > > + * Prevent the read above from possibly being reordered > > + * to happen after the swap() below. > > + */ > > + std::atomic_thread_fence(std::memory_order_release); > > + > > std::vector<Request *> v; > > > > { std::lock_guard guard(gReqlistMutex); ^---- [3] swap(v, gReqList); } ^---- [4] Similarly, [3] is an acquire operation, [3] is a release operation. What could thus be observed by the writer is { std::lock_guard guard(qReglistMutex); swap(v, gReqList); read(gEventfd, buf, 8); } If get_ready_requests() gets called because select() wakes up due to an event on the eventfd, it means the write() has been performed. While it may be seen by the reader before the .push_back(), the lock will ensure sequential consistency between the reader and the writer, the reader won't be able to acquire the lock before the writer releases it, so swap() will not occur before the .push_back() is observed. If get_ready_requests() gets called in blocking mode, read() would block, so there's no way it can get reordered after the lock() by the CPU as the latter won't be executed yet. I'm pretty sure the "as-is" rule (https://en.cppreference.com/w/cpp/language/as_if) prevents the compiler from reordering the read() after the lock() :-) This should thus also be safe. Tl;dr: Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Now the real question is: how many mistakes or inaccuracies does the above explanation contain ? :-) > I think this could use std::atomic_signal_fence, but I'm pretty sure > there's no real life performance difference.
On 04/06/2022 02:16, Laurent Pinchart wrote: > Hi Tomi, > > On Thu, Jun 02, 2022 at 11:27:35AM +0300, Tomi Valkeinen wrote: >> On 31/05/2022 08:51, Tomi Valkeinen wrote: >>> On 30/05/2022 19:57, Tomi Valkeinen wrote: >>> >>>>>> diff --git a/src/py/libcamera/py_main.cpp >>>>>> b/src/py/libcamera/py_main.cpp >>>>>> index fcf009f0..505cc3dc 100644 >>>>>> --- a/src/py/libcamera/py_main.cpp >>>>>> +++ b/src/py/libcamera/py_main.cpp >>>>>> @@ -213,15 +213,12 @@ PYBIND11_MODULE(_libcamera, m) >>>>>> return gEventfd; >>>>>> }) >>>>>> - .def("read_event", [](CameraManager &) { >>>>>> + .def("get_ready_requests", [](CameraManager &) { >>>>>> uint8_t buf[8]; >>>>>> - int ret = read(gEventfd, buf, 8); >>>>>> - if (ret != 8) >>>>>> + if (read(gEventfd, buf, 8) != 8) >>>>>> throw std::system_error(errno, std::generic_category()); >>>>> >>>>> We need a memory barrier here to prevent reordering. I'm quite sure >>>> >>>> We then need the same in handleRequestCompleted(). >>>> >>>>> there's one somewhere due to the read() call and the lock below, but I >>>>> haven't been able to figure out the C++ rule that guarantees this. Does >>>>> anyone know ? >>>> >>>> https://en.cppreference.com/w/cpp/atomic/memory_order >>>> >>>> So the mutex brings the normal acquire-release ordering. I would think >>>> a syscall includes a barrier, but I can't find any details right away. >>> >>> Perhaps something like this? I don't know if the fences are needed, but maybe >>> better safe than sorry. > > Thanks for all the information. If I understand things correctly, the > mutex should be enough. See below (where I'm sure I'll say many stupid, > or at least incorrect, things). > >>> diff --git a/src/py/libcamera/py_main.cpp b/src/py/libcamera/py_main.cpp >>> index 505cc3dc..b035a101 100644 >>> --- a/src/py/libcamera/py_main.cpp >>> +++ b/src/py/libcamera/py_main.cpp >>> @@ -5,6 +5,7 @@ >>> * Python bindings >>> */ >>> >>> +#include <atomic> >>> #include <mutex> >>> #include <stdexcept> >>> #include <sys/eventfd.h> >>> @@ -116,6 +117,12 @@ static void handleRequestCompleted(Request *req) > > Adding some more context: > > { > std::lock_guard guard(gReqlistMutex); > ^---- [1] >>> gReqList.push_back(req); >>> } > ^---- [2] > > [1] locks the mutex, which is an acquire operation. It means that no > read or write in the same thread after the lock() can be reordered > before it. > > [2] unlocks the mutex, which is a release operation. It means that no > read or write in the same thread before the unlock() can be reordered > after it. > > On the reader side, we could thus observe the write() below before the > push_back, but never before the lock(). Effectively, what we could see > on the reader thread, assuming that the write() doesn't contain any > barrier (which is likely incorrect), is > > { > std::lock_guard guard(gReqlistMutex); > write(gEventfd, &v, 8); > gReqList.push_back(req); > } > >>> >>> + /* >>> + * Prevent the write below from possibly being reordered to happen >>> + * before the push_back() above. >>> + */ >>> + std::atomic_thread_fence(std::memory_order_acquire); >>> + >>> uint64_t v = 1; >>> size_t s = write(gEventfd, &v, 8); >>> /* >>> @@ -219,6 +226,12 @@ PYBIND11_MODULE(_libcamera, m) >>> if (read(gEventfd, buf, 8) != 8) >>> throw std::system_error(errno, std::generic_category()); >>> >>> + /* >>> + * Prevent the read above from possibly being reordered >>> + * to happen after the swap() below. >>> + */ >>> + std::atomic_thread_fence(std::memory_order_release); >>> + >>> std::vector<Request *> v; >>> >>> { > std::lock_guard guard(gReqlistMutex); > ^---- [3] > swap(v, gReqList); > } > ^---- [4] > > Similarly, [3] is an acquire operation, [3] is a release operation. What > could thus be observed by the writer is > > { > std::lock_guard guard(qReglistMutex); > swap(v, gReqList); > read(gEventfd, buf, 8); > } > > If get_ready_requests() gets called because select() wakes up due to an > event on the eventfd, it means the write() has been performed. While it > may be seen by the reader before the .push_back(), the lock will ensure > sequential consistency between the reader and the writer, the reader > won't be able to acquire the lock before the writer releases it, so > swap() will not occur before the .push_back() is observed. > > If get_ready_requests() gets called in blocking mode, read() would > block, so there's no way it can get reordered after the lock() by the > CPU as the latter won't be executed yet. I'm pretty sure the "as-is" > rule (https://en.cppreference.com/w/cpp/language/as_if) prevents the > compiler from reordering the read() after the lock() :-) This should > thus also be safe. I agree. I also studied all possible ordering combinations, and I cannot find a case which wouldn't work (without any fences). And I also find it difficult to believe the read/write could happen inside the locks, although I still don't know why. Consider two threads, each of which runs a function which does lock(); access-shared-variable; unlock(). If you then add a read() or write() call to one of those functions, and the call blocks for a longer time, and that call is moved to happen inside the lock, it would cause the other thread to be blocked too. I don't think I would ever figure out that kind of bug, if it would happen. But why it would not happen? Words of wisdom: Single threaded programming is hard. Multi-threaded programming is impossible. > Tl;dr: > > Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> > > Now the real question is: how many mistakes or inaccuracies does the > above explanation contain ? :-) In ten years, we happen to encounter this mail via some random googling, and, oh man, the shame of writing all this nonsense when we were young and ignorant... Tomi
diff --git a/src/py/cam/cam.py b/src/py/cam/cam.py index bf8529d9..2ae89fa8 100755 --- a/src/py/cam/cam.py +++ b/src/py/cam/cam.py @@ -243,8 +243,6 @@ class CaptureState: # Called from renderer when there is a libcamera event def event_handler(self): try: - self.cm.read_event() - reqs = self.cm.get_ready_requests() for req in reqs: diff --git a/src/py/libcamera/py_main.cpp b/src/py/libcamera/py_main.cpp index fcf009f0..505cc3dc 100644 --- a/src/py/libcamera/py_main.cpp +++ b/src/py/libcamera/py_main.cpp @@ -213,15 +213,12 @@ PYBIND11_MODULE(_libcamera, m) return gEventfd; }) - .def("read_event", [](CameraManager &) { + .def("get_ready_requests", [](CameraManager &) { uint8_t buf[8]; - int ret = read(gEventfd, buf, 8); - if (ret != 8) + if (read(gEventfd, buf, 8) != 8) throw std::system_error(errno, std::generic_category()); - }) - .def("get_ready_requests", [](CameraManager &) { std::vector<Request *> v; { diff --git a/test/py/unittests.py b/test/py/unittests.py index 33b35a0a..9adc4337 100755 --- a/test/py/unittests.py +++ b/test/py/unittests.py @@ -210,8 +210,6 @@ class SimpleCaptureMethods(CameraTesterBase): reqs = [] while True: - cm.read_event() - ready_reqs = cm.get_ready_requests() reqs += ready_reqs @@ -283,8 +281,6 @@ class SimpleCaptureMethods(CameraTesterBase): while running: events = sel.select() for key, _ in events: - cm.read_event() - ready_reqs = cm.get_ready_requests() reqs += ready_reqs
We always call CameraManager.read_event() and CameraManager.get_ready_requests(), so to simplify the use merge the read_event() into the get_ready_requests(). This has the side effect that get_ready_requests() will now block if there is no event ready. If we ever need to call get_ready_requests() in a polling manner we will need a new function which behaves differently. However, afaics the only sensible way to manage the event loop is to use select/poll on the eventfd and then call get_ready_requests() once, which is the use case what the current merged function supports. Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> --- src/py/cam/cam.py | 2 -- src/py/libcamera/py_main.cpp | 7 ++----- test/py/unittests.py | 4 ---- 3 files changed, 2 insertions(+), 11 deletions(-)