| Message ID | 20260213-kbingham-quantizers-v7-3-1626b9aaabf1@ideasonboard.com |
|---|---|
| State | Accepted |
| Headers | show |
| Series |
|
| Related | show |
Quoting Kieran Bingham (2026-02-14 01:57:42) > The fixedToFloatingPoint does not support unsigned Q types, and > incorrectly sign-extends all values which have the top most bit set in > the quantized values. > > Fix this by ensuring that only signed types perform sign extension, and > simplify the calculation for unsigned types. > > Convert the storage of the test cases to signed types to correctly > represent their intended purpose, to prevent test failures. > > Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> Reviewed-by: Paul Elder <paul.elder@ideasonboard.com> > > --- > > v6: > - Use T{1} << F instead of 1 << F > > v7: > - Fix checkstyle issues > --- > src/ipa/libipa/fixedpoint.h | 3 +++ > test/ipa/libipa/fixedpoint.cpp | 6 +++--- > 2 files changed, 6 insertions(+), 3 deletions(-) > > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > index 709cf50f0fcd..aeb9bce3269b 100644 > --- a/src/ipa/libipa/fixedpoint.h > +++ b/src/ipa/libipa/fixedpoint.h > @@ -49,6 +49,9 @@ constexpr R fixedToFloatingPoint(T number) > static_assert(sizeof(int) >= sizeof(T)); > static_assert(I + F <= sizeof(T) * 8); > > + if constexpr (std::is_unsigned_v<T>) > + return static_cast<R>(number) / static_cast<R>(T{ 1 } << F); > + > /* > * Recreate the upper bits in case of a negative number by shifting the sign > * bit from the fixed point to the first bit of the unsigned and then right shifting > diff --git a/test/ipa/libipa/fixedpoint.cpp b/test/ipa/libipa/fixedpoint.cpp > index 99eb662ddf4e..4b017e86a74f 100644 > --- a/test/ipa/libipa/fixedpoint.cpp > +++ b/test/ipa/libipa/fixedpoint.cpp > @@ -68,7 +68,7 @@ protected: > * The second 7.992 test is to test that unused bits don't > * affect the result. > */ > - std::map<double, uint16_t> testCases = { > + std::map<double, int16_t> testCases = { > { 7.992, 0x3ff }, > { 0.2, 0x01a }, > { -0.2, 0x7e6 }, > @@ -81,14 +81,14 @@ protected: > > int ret; > for (const auto &testCase : testCases) { > - ret = testSingleFixedPoint<4, 7, uint16_t>(testCase.first, > + ret = testSingleFixedPoint<4, 7, int16_t>(testCase.first, > testCase.second); > if (ret != TestPass) > return ret; > } > > /* Special case with a superfluous one in the unused bits */ > - ret = testFixedToFloat<4, 7, uint16_t, double>(0xbff, 7.992); > + ret = testFixedToFloat<4, 7, int16_t, double>(0xbff, 7.992); > if (ret != TestPass) > return ret; > > > -- > 2.52.0 >
On Fri, Feb 13, 2026 at 04:57:42PM +0000, Kieran Bingham wrote: > The fixedToFloatingPoint does not support unsigned Q types, and > incorrectly sign-extends all values which have the top most bit set in > the quantized values. > > Fix this by ensuring that only signed types perform sign extension, and > simplify the calculation for unsigned types. > > Convert the storage of the test cases to signed types to correctly > represent their intended purpose, to prevent test failures. > > Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> > --- > > v6: > - Use T{1} << F instead of 1 << F > > v7: > - Fix checkstyle issues > --- > src/ipa/libipa/fixedpoint.h | 3 +++ > test/ipa/libipa/fixedpoint.cpp | 6 +++--- > 2 files changed, 6 insertions(+), 3 deletions(-) > > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > index 709cf50f0fcd..aeb9bce3269b 100644 > --- a/src/ipa/libipa/fixedpoint.h > +++ b/src/ipa/libipa/fixedpoint.h > @@ -49,6 +49,9 @@ constexpr R fixedToFloatingPoint(T number) > static_assert(sizeof(int) >= sizeof(T)); > static_assert(I + F <= sizeof(T) * 8); > > + if constexpr (std::is_unsigned_v<T>) > + return static_cast<R>(number) / static_cast<R>(T{ 1 } << F); > + > /* > * Recreate the upper bits in case of a negative number by shifting the sign > * bit from the fixed point to the first bit of the unsigned and then right shifting > diff --git a/test/ipa/libipa/fixedpoint.cpp b/test/ipa/libipa/fixedpoint.cpp > index 99eb662ddf4e..4b017e86a74f 100644 > --- a/test/ipa/libipa/fixedpoint.cpp > +++ b/test/ipa/libipa/fixedpoint.cpp > @@ -68,7 +68,7 @@ protected: > * The second 7.992 test is to test that unused bits don't > * affect the result. > */ > - std::map<double, uint16_t> testCases = { > + std::map<double, int16_t> testCases = { > { 7.992, 0x3ff }, > { 0.2, 0x01a }, > { -0.2, 0x7e6 }, > @@ -81,14 +81,14 @@ protected: > > int ret; > for (const auto &testCase : testCases) { > - ret = testSingleFixedPoint<4, 7, uint16_t>(testCase.first, > + ret = testSingleFixedPoint<4, 7, int16_t>(testCase.first, > testCase.second); > if (ret != TestPass) > return ret; > } > > /* Special case with a superfluous one in the unused bits */ > - ret = testFixedToFloat<4, 7, uint16_t, double>(0xbff, 7.992); > + ret = testFixedToFloat<4, 7, int16_t, double>(0xbff, 7.992); > if (ret != TestPass) > return ret; >
diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h index 709cf50f0fcd..aeb9bce3269b 100644 --- a/src/ipa/libipa/fixedpoint.h +++ b/src/ipa/libipa/fixedpoint.h @@ -49,6 +49,9 @@ constexpr R fixedToFloatingPoint(T number) static_assert(sizeof(int) >= sizeof(T)); static_assert(I + F <= sizeof(T) * 8); + if constexpr (std::is_unsigned_v<T>) + return static_cast<R>(number) / static_cast<R>(T{ 1 } << F); + /* * Recreate the upper bits in case of a negative number by shifting the sign * bit from the fixed point to the first bit of the unsigned and then right shifting diff --git a/test/ipa/libipa/fixedpoint.cpp b/test/ipa/libipa/fixedpoint.cpp index 99eb662ddf4e..4b017e86a74f 100644 --- a/test/ipa/libipa/fixedpoint.cpp +++ b/test/ipa/libipa/fixedpoint.cpp @@ -68,7 +68,7 @@ protected: * The second 7.992 test is to test that unused bits don't * affect the result. */ - std::map<double, uint16_t> testCases = { + std::map<double, int16_t> testCases = { { 7.992, 0x3ff }, { 0.2, 0x01a }, { -0.2, 0x7e6 }, @@ -81,14 +81,14 @@ protected: int ret; for (const auto &testCase : testCases) { - ret = testSingleFixedPoint<4, 7, uint16_t>(testCase.first, + ret = testSingleFixedPoint<4, 7, int16_t>(testCase.first, testCase.second); if (ret != TestPass) return ret; } /* Special case with a superfluous one in the unused bits */ - ret = testFixedToFloat<4, 7, uint16_t, double>(0xbff, 7.992); + ret = testFixedToFloat<4, 7, int16_t, double>(0xbff, 7.992); if (ret != TestPass) return ret;