Message ID | 20241012124926.1049458-2-chenghaoyang@chromium.org |
---|---|
State | Superseded |
Headers | show |
Series |
|
Related | show |
Hi Harvey, Han-Lin, Thank you for the patch. On Sat, Oct 12, 2024 at 12:47:57PM +0000, Harvey Yang wrote: > From: Han-Lin Chen <hanlinchen@chromium.org> > > Add method to set thread affinity to Thread class. > > Signed-off-by: Han-Lin Chen <hanlinchen@chromium.org> > Co-developed-by: Harvey Yang <chenghaoyang@chromium.org> > Signed-off-by: Harvey Yang <chenghaoyang@chromium.org> > --- > include/libcamera/base/thread.h | 6 +++++ > src/libcamera/base/thread.cpp | 47 +++++++++++++++++++++++++++++++++ > 2 files changed, 53 insertions(+) > > diff --git a/include/libcamera/base/thread.h b/include/libcamera/base/thread.h > index 4f33de63d..c74fe102b 100644 > --- a/include/libcamera/base/thread.h > +++ b/include/libcamera/base/thread.h > @@ -35,6 +35,8 @@ public: > void exit(int code = 0); > bool wait(utils::duration duration = utils::duration::max()); > > + void setThreadAffinity(const std::vector<int> &cpus); > + > bool isRunning(); > > Signal<> finished; > @@ -54,6 +56,8 @@ private: > void startThread(); > void finishThread(); > > + void setThreadAffinityInternal(); > + > void postMessage(std::unique_ptr<Message> msg, Object *receiver); > void removeMessages(Object *receiver); > > @@ -67,6 +71,8 @@ private: > > std::thread thread_; > ThreadData *data_; > + > + std::vector<int> cpus_; You can move this to ThreadData so it won't affect the API. > }; > > } /* namespace libcamera */ > diff --git a/src/libcamera/base/thread.cpp b/src/libcamera/base/thread.cpp > index 8735670b8..de5423eda 100644 > --- a/src/libcamera/base/thread.cpp > +++ b/src/libcamera/base/thread.cpp > @@ -254,6 +254,8 @@ void Thread::start() > data_->exit_.store(false, std::memory_order_relaxed); > > thread_ = std::thread(&Thread::startThread, this); > + > + setThreadAffinityInternal(); > } > > void Thread::startThread() > @@ -410,6 +412,51 @@ bool Thread::wait(utils::duration duration) > return hasFinished; > } > > +/** > + * \brief Set the CPU affinity mask of the thread > + * \param[in] cpus The list of CPU indices that the thread is set affinity to As there's error handling in the function, you should describe what happens in case of errors. > + */ > +void Thread::setThreadAffinity(const std::vector<int> &cpus) Make it a span so the caller can also use an array instead of a vector. > +{ > + cpus_.clear(); > + > + const unsigned int num_cpus = std::thread::hardware_concurrency(); > + for (const int &cpu : cpus) { > + if (cpus_.size() == CPU_SETSIZE) { > + LOG(Thread, Error) << "cpus_ already contains " << CPU_SETSIZE > + << " cpus. Ignoring the rest."; > + break; > + } > + > + if (cpu >= static_cast<int>(num_cpus) || cpu < 0) { Why is the argument to the function a vector of int if entries can't be negative ? Can't it be a vector of unsigned int instead ? You'll be able to drop the cast here and drop the second check. > + LOG(Thread, Error) << "Ignore an invalid cpu index: " << cpu; > + continue; > + } > + > + cpus_.push_back(cpu); > + } > + > + MutexLocker locker(data_->mutex_); > + if (data_->running_) > + setThreadAffinityInternal(); > +} > + > +void Thread::setThreadAffinityInternal() > +{ > + if (cpus_.empty()) > + return; > + > + auto handle = thread_.native_handle(); > + > + cpu_set_t cpuset; > + CPU_ZERO(&cpuset); > + > + for (auto cpu : cpus_) > + CPU_SET(cpu, &cpuset); Can't you compute the cpu_set_t and store it in the ThreadData class, instead of storing a vector ? This can be done without holding the lock. > + > + pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset); sizeof(cpu_set_t) > +} > + > /** > * \brief Check if the thread is running > *
Hi Laurent, On Sun, Oct 13, 2024 at 11:58 PM Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote: > > Hi Harvey, Han-Lin, > > Thank you for the patch. > > On Sat, Oct 12, 2024 at 12:47:57PM +0000, Harvey Yang wrote: > > From: Han-Lin Chen <hanlinchen@chromium.org> > > > > Add method to set thread affinity to Thread class. > > > > Signed-off-by: Han-Lin Chen <hanlinchen@chromium.org> > > Co-developed-by: Harvey Yang <chenghaoyang@chromium.org> > > Signed-off-by: Harvey Yang <chenghaoyang@chromium.org> > > --- > > include/libcamera/base/thread.h | 6 +++++ > > src/libcamera/base/thread.cpp | 47 +++++++++++++++++++++++++++++++++ > > 2 files changed, 53 insertions(+) > > > > diff --git a/include/libcamera/base/thread.h b/include/libcamera/base/thread.h > > index 4f33de63d..c74fe102b 100644 > > --- a/include/libcamera/base/thread.h > > +++ b/include/libcamera/base/thread.h > > @@ -35,6 +35,8 @@ public: > > void exit(int code = 0); > > bool wait(utils::duration duration = utils::duration::max()); > > > > + void setThreadAffinity(const std::vector<int> &cpus); > > + > > bool isRunning(); > > > > Signal<> finished; > > @@ -54,6 +56,8 @@ private: > > void startThread(); > > void finishThread(); > > > > + void setThreadAffinityInternal(); > > + > > void postMessage(std::unique_ptr<Message> msg, Object *receiver); > > void removeMessages(Object *receiver); > > > > @@ -67,6 +71,8 @@ private: > > > > std::thread thread_; > > ThreadData *data_; > > + > > + std::vector<int> cpus_; > > You can move this to ThreadData so it won't affect the API. Got it. Done. > > > }; > > > > } /* namespace libcamera */ > > diff --git a/src/libcamera/base/thread.cpp b/src/libcamera/base/thread.cpp > > index 8735670b8..de5423eda 100644 > > --- a/src/libcamera/base/thread.cpp > > +++ b/src/libcamera/base/thread.cpp > > @@ -254,6 +254,8 @@ void Thread::start() > > data_->exit_.store(false, std::memory_order_relaxed); > > > > thread_ = std::thread(&Thread::startThread, this); > > + > > + setThreadAffinityInternal(); > > } > > > > void Thread::startThread() > > @@ -410,6 +412,51 @@ bool Thread::wait(utils::duration duration) > > return hasFinished; > > } > > > > +/** > > + * \brief Set the CPU affinity mask of the thread > > + * \param[in] cpus The list of CPU indices that the thread is set affinity to > > As there's error handling in the function, you should describe what > happens in case of errors. Added description. > > > + */ > > +void Thread::setThreadAffinity(const std::vector<int> &cpus) > > Make it a span so the caller can also use an array instead of a vector. Done. > > > +{ > > + cpus_.clear(); > > + > > + const unsigned int num_cpus = std::thread::hardware_concurrency(); > > + for (const int &cpu : cpus) { > > + if (cpus_.size() == CPU_SETSIZE) { > > + LOG(Thread, Error) << "cpus_ already contains " << CPU_SETSIZE > > + << " cpus. Ignoring the rest."; > > + break; > > + } > > + > > + if (cpu >= static_cast<int>(num_cpus) || cpu < 0) { > > Why is the argument to the function a vector of int if entries can't be > negative ? Can't it be a vector of unsigned int instead ? You'll be able > to drop the cast here and drop the second check. Right, I thought CPU_SET only takes int, while I was wrong. > > > + LOG(Thread, Error) << "Ignore an invalid cpu index: " << cpu; > > + continue; > > + } > > + > > + cpus_.push_back(cpu); > > + } > > + > > + MutexLocker locker(data_->mutex_); > > + if (data_->running_) > > + setThreadAffinityInternal(); > > +} > > + > > +void Thread::setThreadAffinityInternal() > > +{ > > + if (cpus_.empty()) > > + return; > > + > > + auto handle = thread_.native_handle(); > > + > > + cpu_set_t cpuset; > > + CPU_ZERO(&cpuset); > > + > > + for (auto cpu : cpus_) > > + CPU_SET(cpu, &cpuset); > > Can't you compute the cpu_set_t and store it in the ThreadData class, > instead of storing a vector ? This can be done without holding the lock. Done, while I still think that it should only be updated when holding the lock, to avoid race conditions. > > > + > > + pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset); > > sizeof(cpu_set_t) Sorry, I don't get it: I think I already used `sizeof(cpu_set_t)`. > > > +} > > + > > /** > > * \brief Check if the thread is running > > * > > -- > Regards, > > Laurent Pinchart
On Mon, Oct 14, 2024 at 03:00:09PM +0800, Cheng-Hao Yang wrote: > On Sun, Oct 13, 2024 at 11:58 PM Laurent Pinchart wrote: > > On Sat, Oct 12, 2024 at 12:47:57PM +0000, Harvey Yang wrote: > > > From: Han-Lin Chen <hanlinchen@chromium.org> > > > > > > Add method to set thread affinity to Thread class. > > > > > > Signed-off-by: Han-Lin Chen <hanlinchen@chromium.org> > > > Co-developed-by: Harvey Yang <chenghaoyang@chromium.org> > > > Signed-off-by: Harvey Yang <chenghaoyang@chromium.org> > > > --- > > > include/libcamera/base/thread.h | 6 +++++ > > > src/libcamera/base/thread.cpp | 47 +++++++++++++++++++++++++++++++++ > > > 2 files changed, 53 insertions(+) > > > > > > diff --git a/include/libcamera/base/thread.h b/include/libcamera/base/thread.h > > > index 4f33de63d..c74fe102b 100644 > > > --- a/include/libcamera/base/thread.h > > > +++ b/include/libcamera/base/thread.h > > > @@ -35,6 +35,8 @@ public: > > > void exit(int code = 0); > > > bool wait(utils::duration duration = utils::duration::max()); > > > > > > + void setThreadAffinity(const std::vector<int> &cpus); > > > + > > > bool isRunning(); > > > > > > Signal<> finished; > > > @@ -54,6 +56,8 @@ private: > > > void startThread(); > > > void finishThread(); > > > > > > + void setThreadAffinityInternal(); > > > + > > > void postMessage(std::unique_ptr<Message> msg, Object *receiver); > > > void removeMessages(Object *receiver); > > > > > > @@ -67,6 +71,8 @@ private: > > > > > > std::thread thread_; > > > ThreadData *data_; > > > + > > > + std::vector<int> cpus_; > > > > You can move this to ThreadData so it won't affect the API. > > Got it. Done. > > > > }; > > > > > > } /* namespace libcamera */ > > > diff --git a/src/libcamera/base/thread.cpp b/src/libcamera/base/thread.cpp > > > index 8735670b8..de5423eda 100644 > > > --- a/src/libcamera/base/thread.cpp > > > +++ b/src/libcamera/base/thread.cpp > > > @@ -254,6 +254,8 @@ void Thread::start() > > > data_->exit_.store(false, std::memory_order_relaxed); > > > > > > thread_ = std::thread(&Thread::startThread, this); > > > + > > > + setThreadAffinityInternal(); > > > } > > > > > > void Thread::startThread() > > > @@ -410,6 +412,51 @@ bool Thread::wait(utils::duration duration) > > > return hasFinished; > > > } > > > > > > +/** > > > + * \brief Set the CPU affinity mask of the thread > > > + * \param[in] cpus The list of CPU indices that the thread is set affinity to > > > > As there's error handling in the function, you should describe what > > happens in case of errors. > > Added description. > > > > + */ > > > +void Thread::setThreadAffinity(const std::vector<int> &cpus) > > > > Make it a span so the caller can also use an array instead of a vector. > > Done. > > > > +{ > > > + cpus_.clear(); > > > + > > > + const unsigned int num_cpus = std::thread::hardware_concurrency(); > > > + for (const int &cpu : cpus) { > > > + if (cpus_.size() == CPU_SETSIZE) { > > > + LOG(Thread, Error) << "cpus_ already contains " << CPU_SETSIZE > > > + << " cpus. Ignoring the rest."; > > > + break; > > > + } > > > + > > > + if (cpu >= static_cast<int>(num_cpus) || cpu < 0) { > > > > Why is the argument to the function a vector of int if entries can't be > > negative ? Can't it be a vector of unsigned int instead ? You'll be able > > to drop the cast here and drop the second check. > > Right, I thought CPU_SET only takes int, while I was wrong. > > > > + LOG(Thread, Error) << "Ignore an invalid cpu index: " << cpu; > > > + continue; > > > + } > > > + > > > + cpus_.push_back(cpu); > > > + } > > > + > > > + MutexLocker locker(data_->mutex_); > > > + if (data_->running_) > > > + setThreadAffinityInternal(); > > > +} > > > + > > > +void Thread::setThreadAffinityInternal() > > > +{ > > > + if (cpus_.empty()) > > > + return; > > > + > > > + auto handle = thread_.native_handle(); > > > + > > > + cpu_set_t cpuset; > > > + CPU_ZERO(&cpuset); > > > + > > > + for (auto cpu : cpus_) > > > + CPU_SET(cpu, &cpuset); > > > > Can't you compute the cpu_set_t and store it in the ThreadData class, > > instead of storing a vector ? This can be done without holding the lock. > > Done, while I still think that it should only be updated when holding the > lock, to avoid race conditions. True. We could compute a local cpuset variable and then copy it to cpuset_, but that's probably overkill. In practice I expect the vector to contain a small number of elements. > > > + > > > + pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset); > > > > sizeof(cpu_set_t) > > Sorry, I don't get it: I think I already used `sizeof(cpu_set_t)`. Sorry, I meant sizeof(cpuset). > > > +} > > > + > > > /** > > > * \brief Check if the thread is running > > > *
Hi Laurent, On Tue, Oct 15, 2024 at 4:39 AM Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote: > > On Mon, Oct 14, 2024 at 03:00:09PM +0800, Cheng-Hao Yang wrote: > > On Sun, Oct 13, 2024 at 11:58 PM Laurent Pinchart wrote: > > > On Sat, Oct 12, 2024 at 12:47:57PM +0000, Harvey Yang wrote: > > > > From: Han-Lin Chen <hanlinchen@chromium.org> > > > > > > > > Add method to set thread affinity to Thread class. > > > > > > > > Signed-off-by: Han-Lin Chen <hanlinchen@chromium.org> > > > > Co-developed-by: Harvey Yang <chenghaoyang@chromium.org> > > > > Signed-off-by: Harvey Yang <chenghaoyang@chromium.org> > > > > --- > > > > include/libcamera/base/thread.h | 6 +++++ > > > > src/libcamera/base/thread.cpp | 47 +++++++++++++++++++++++++++++++++ > > > > 2 files changed, 53 insertions(+) > > > > > > > > diff --git a/include/libcamera/base/thread.h b/include/libcamera/base/thread.h > > > > index 4f33de63d..c74fe102b 100644 > > > > --- a/include/libcamera/base/thread.h > > > > +++ b/include/libcamera/base/thread.h > > > > @@ -35,6 +35,8 @@ public: > > > > void exit(int code = 0); > > > > bool wait(utils::duration duration = utils::duration::max()); > > > > > > > > + void setThreadAffinity(const std::vector<int> &cpus); > > > > + > > > > bool isRunning(); > > > > > > > > Signal<> finished; > > > > @@ -54,6 +56,8 @@ private: > > > > void startThread(); > > > > void finishThread(); > > > > > > > > + void setThreadAffinityInternal(); > > > > + > > > > void postMessage(std::unique_ptr<Message> msg, Object *receiver); > > > > void removeMessages(Object *receiver); > > > > > > > > @@ -67,6 +71,8 @@ private: > > > > > > > > std::thread thread_; > > > > ThreadData *data_; > > > > + > > > > + std::vector<int> cpus_; > > > > > > You can move this to ThreadData so it won't affect the API. > > > > Got it. Done. > > > > > > }; > > > > > > > > } /* namespace libcamera */ > > > > diff --git a/src/libcamera/base/thread.cpp b/src/libcamera/base/thread.cpp > > > > index 8735670b8..de5423eda 100644 > > > > --- a/src/libcamera/base/thread.cpp > > > > +++ b/src/libcamera/base/thread.cpp > > > > @@ -254,6 +254,8 @@ void Thread::start() > > > > data_->exit_.store(false, std::memory_order_relaxed); > > > > > > > > thread_ = std::thread(&Thread::startThread, this); > > > > + > > > > + setThreadAffinityInternal(); > > > > } > > > > > > > > void Thread::startThread() > > > > @@ -410,6 +412,51 @@ bool Thread::wait(utils::duration duration) > > > > return hasFinished; > > > > } > > > > > > > > +/** > > > > + * \brief Set the CPU affinity mask of the thread > > > > + * \param[in] cpus The list of CPU indices that the thread is set affinity to > > > > > > As there's error handling in the function, you should describe what > > > happens in case of errors. > > > > Added description. > > > > > > + */ > > > > +void Thread::setThreadAffinity(const std::vector<int> &cpus) > > > > > > Make it a span so the caller can also use an array instead of a vector. > > > > Done. > > > > > > +{ > > > > + cpus_.clear(); > > > > + > > > > + const unsigned int num_cpus = std::thread::hardware_concurrency(); > > > > + for (const int &cpu : cpus) { > > > > + if (cpus_.size() == CPU_SETSIZE) { > > > > + LOG(Thread, Error) << "cpus_ already contains " << CPU_SETSIZE > > > > + << " cpus. Ignoring the rest."; > > > > + break; > > > > + } > > > > + > > > > + if (cpu >= static_cast<int>(num_cpus) || cpu < 0) { > > > > > > Why is the argument to the function a vector of int if entries can't be > > > negative ? Can't it be a vector of unsigned int instead ? You'll be able > > > to drop the cast here and drop the second check. > > > > Right, I thought CPU_SET only takes int, while I was wrong. > > > > > > + LOG(Thread, Error) << "Ignore an invalid cpu index: " << cpu; > > > > + continue; > > > > + } > > > > + > > > > + cpus_.push_back(cpu); > > > > + } > > > > + > > > > + MutexLocker locker(data_->mutex_); > > > > + if (data_->running_) > > > > + setThreadAffinityInternal(); > > > > +} > > > > + > > > > +void Thread::setThreadAffinityInternal() > > > > +{ > > > > + if (cpus_.empty()) > > > > + return; > > > > + > > > > + auto handle = thread_.native_handle(); > > > > + > > > > + cpu_set_t cpuset; > > > > + CPU_ZERO(&cpuset); > > > > + > > > > + for (auto cpu : cpus_) > > > > + CPU_SET(cpu, &cpuset); > > > > > > Can't you compute the cpu_set_t and store it in the ThreadData class, > > > instead of storing a vector ? This can be done without holding the lock. > > > > Done, while I still think that it should only be updated when holding the > > lock, to avoid race conditions. > > True. We could compute a local cpuset variable and then copy it to > cpuset_, but that's probably overkill. In practice I expect the vector > to contain a small number of elements. Yeah I considered that, while I found that CPU_COPY is not in the standard library. Let's keep it as is for now. > > > > > + > > > > + pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset); > > > > > > sizeof(cpu_set_t) > > > > Sorry, I don't get it: I think I already used `sizeof(cpu_set_t)`. > > Sorry, I meant sizeof(cpuset). Done BR, Harvey > > > > > +} > > > > + > > > > /** > > > > * \brief Check if the thread is running > > > > * > > -- > Regards, > > Laurent Pinchart
diff --git a/include/libcamera/base/thread.h b/include/libcamera/base/thread.h index 4f33de63d..c74fe102b 100644 --- a/include/libcamera/base/thread.h +++ b/include/libcamera/base/thread.h @@ -35,6 +35,8 @@ public: void exit(int code = 0); bool wait(utils::duration duration = utils::duration::max()); + void setThreadAffinity(const std::vector<int> &cpus); + bool isRunning(); Signal<> finished; @@ -54,6 +56,8 @@ private: void startThread(); void finishThread(); + void setThreadAffinityInternal(); + void postMessage(std::unique_ptr<Message> msg, Object *receiver); void removeMessages(Object *receiver); @@ -67,6 +71,8 @@ private: std::thread thread_; ThreadData *data_; + + std::vector<int> cpus_; }; } /* namespace libcamera */ diff --git a/src/libcamera/base/thread.cpp b/src/libcamera/base/thread.cpp index 8735670b8..de5423eda 100644 --- a/src/libcamera/base/thread.cpp +++ b/src/libcamera/base/thread.cpp @@ -254,6 +254,8 @@ void Thread::start() data_->exit_.store(false, std::memory_order_relaxed); thread_ = std::thread(&Thread::startThread, this); + + setThreadAffinityInternal(); } void Thread::startThread() @@ -410,6 +412,51 @@ bool Thread::wait(utils::duration duration) return hasFinished; } +/** + * \brief Set the CPU affinity mask of the thread + * \param[in] cpus The list of CPU indices that the thread is set affinity to + */ +void Thread::setThreadAffinity(const std::vector<int> &cpus) +{ + cpus_.clear(); + + const unsigned int num_cpus = std::thread::hardware_concurrency(); + for (const int &cpu : cpus) { + if (cpus_.size() == CPU_SETSIZE) { + LOG(Thread, Error) << "cpus_ already contains " << CPU_SETSIZE + << " cpus. Ignoring the rest."; + break; + } + + if (cpu >= static_cast<int>(num_cpus) || cpu < 0) { + LOG(Thread, Error) << "Ignore an invalid cpu index: " << cpu; + continue; + } + + cpus_.push_back(cpu); + } + + MutexLocker locker(data_->mutex_); + if (data_->running_) + setThreadAffinityInternal(); +} + +void Thread::setThreadAffinityInternal() +{ + if (cpus_.empty()) + return; + + auto handle = thread_.native_handle(); + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + + for (auto cpu : cpus_) + CPU_SET(cpu, &cpuset); + + pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset); +} + /** * \brief Check if the thread is running *