Message ID | 20210121101549.134574-2-paul.elder@ideasonboard.com |
---|---|
State | Superseded |
Delegated to: | Paul Elder |
Headers | show |
Series |
|
Related | show |
Hi Paul, Thank you for the patch. On Thu, Jan 21, 2021 at 07:15:41PM +0900, Paul Elder wrote: > GPSProcessingMethod and UserComment in EXIF tags can be in UTF-16. > Expand setString to take an encoding when the field type is undefined. > Update callers accordingly. > > Signed-off-by: Paul Elder <paul.elder@ideasonboard.com> > > --- > Changes in v2: > - moved from utils into exif > - support no-encoding > --- > src/android/jpeg/exif.cpp | 107 +++++++++++++++++++++++++++++++------- > src/android/jpeg/exif.h | 12 ++++- > 2 files changed, 99 insertions(+), 20 deletions(-) > > diff --git a/src/android/jpeg/exif.cpp b/src/android/jpeg/exif.cpp > index 33b3fa7f..cff366a4 100644 > --- a/src/android/jpeg/exif.cpp > +++ b/src/android/jpeg/exif.cpp > @@ -7,6 +7,9 @@ > > #include "exif.h" > > +#include <map> > +#include <uchar.h> > + > #include "libcamera/internal/log.h" > #include "libcamera/internal/utils.h" > > @@ -64,7 +67,7 @@ Exif::Exif() > exif_data_set_byte_order(data_, order_); > > setString(EXIF_IFD_EXIF, EXIF_TAG_EXIF_VERSION, > - EXIF_FORMAT_UNDEFINED, "0231"); > + EXIF_FORMAT_UNDEFINED, NoEncoding, "0231"); > > /* Create the mandatory EXIF fields with default data. */ > exif_data_fix(data_); > @@ -178,10 +181,18 @@ void Exif::setRational(ExifIfd ifd, ExifTag tag, ExifRational item) > exif_entry_unref(entry); > } > > -void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::string &item) > +static const std::map<Exif::StringEncoding, std::vector<uint8_t>> StringEncodingCodes = { static const std::map<Exif::StringEncoding, std::array<uint8_t, 8>> StringEncodingCodes would be more efficient (if it doesn't cause too many issues in the code below). And s/StringEncodingCodes/stringEncodingCodes/ > + { Exif::ASCII, { 0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00 } }, > + { Exif::JIS, { 0x4a, 0x49, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00 } }, > + { Exif::Unicode, { 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, 0x00 } }, > + { Exif::Undefined, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, Should we drop JIS and Undefined ? It would be different if we were parsing EXIF data, for as an EXIF generator, I doubt we'll ever generate a string in any of those encodings. > +}; > + > +void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, > + StringEncoding enc, const std::string &item) s/enc/encoding/ ? > { > std::string ascii; > - size_t length; > + size_t length = 0; > const char *str; > > if (format == EXIF_FORMAT_ASCII) { > @@ -190,14 +201,44 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str > > /* Pad 1 extra byte to null-terminate the ASCII string. */ > length = ascii.length() + 1; > - } else { > - str = item.c_str(); > - > - /* > - * Strings stored in different formats (EXIF_FORMAT_UNDEFINED) > - * are not null-terminated. > - */ > - length = item.length(); > + } else if (format == EXIF_FORMAT_UNDEFINED) { Any reason to replace else with else if ? If you keep a plain else, you won't have to initialize length to 0. > + std::vector<uint8_t> buf; > + const std::vector<uint8_t> &constbuf = buf; I don't think you need constbuf. You can move the encoding lookup out of the switch: auto encodingString = stringEncodingCodes.find(enc); if (encodingString != stringEncodingCodes.end()) buf = { encodingString->second.begin(), encodingString->second.end() }; (This handles the conversion from std::array to std::vector) > + std::u16string u16str; > + > + switch (enc) { > + case Unicode: > + buf = StringEncodingCodes.find(enc)->second; > + u16str = utf8ToUtf16(item); > + > + /* Little-endian */ Should this depend on the endianness of the EXIF ? > + buf.resize(8 + u16str.size() * 2); > + for (size_t i = 0; i < u16str.size(); i++) { for (char16_t c : u16str) { > + buf[8 + 2 * i] = (u16str[i] & 0xff); > + buf[8 + 2 * i + 1] = ((u16str[i] >> 8) & 0xff); > + } > + > + str = reinterpret_cast<const char *>(constbuf.data()); > + length = buf.size(); > + > + break; > + /* \todo Support JIS */ > + case JIS: > + case ASCII: > + case Undefined: > + buf = StringEncodingCodes.find(enc)->second; > + [[fallthrough]]; case NoEncoding: and you should be able to drop the default case. > + default: > + buf.insert(buf.end(), item.begin(), item.end()); > + str = reinterpret_cast<const char *>(constbuf.data()); > + > + /* > + * Strings stored in different formats (EXIF_FORMAT_UNDEFINED) > + * are not null-terminated. > + */ > + length = buf.size(); > + break; > + } Once you exit this scope, buf will be destroyed, and str becomes an invalid pointer. You need to declare buf in the top level scope. > } > > ExifEntry *entry = createEntry(ifd, tag, format, length, length); > @@ -210,12 +251,12 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str > > void Exif::setMake(const std::string &make) > { > - setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, make); > + setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, NoEncoding, make); > } > > void Exif::setModel(const std::string &model) > { > - setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, model); > + setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, NoEncoding, model); > } > > void Exif::setSize(const Size &size) > @@ -233,9 +274,9 @@ void Exif::setTimestamp(time_t timestamp) > strftime(str, sizeof(str), "%Y:%m:%d %H:%M:%S", &tm); > std::string ts(str); > > - setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, ts); > - setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, ts); > - setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, ts); > + setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, NoEncoding, ts); > + setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, NoEncoding, ts); > + setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, NoEncoding, ts); > > /* Query and set timezone information if available. */ > int r = strftime(str, sizeof(str), "%z", &tm); > @@ -244,13 +285,13 @@ void Exif::setTimestamp(time_t timestamp) > tz.insert(3, 1, ':'); > setString(EXIF_IFD_EXIF, > static_cast<ExifTag>(_ExifTag::OFFSET_TIME), > - EXIF_FORMAT_ASCII, tz); > + EXIF_FORMAT_ASCII, NoEncoding, tz); > setString(EXIF_IFD_EXIF, > static_cast<ExifTag>(_ExifTag::OFFSET_TIME_ORIGINAL), > - EXIF_FORMAT_ASCII, tz); > + EXIF_FORMAT_ASCII, NoEncoding, tz); > setString(EXIF_IFD_EXIF, > static_cast<ExifTag>(_ExifTag::OFFSET_TIME_DIGITIZED), > - EXIF_FORMAT_ASCII, tz); > + EXIF_FORMAT_ASCII, NoEncoding, tz); > } > } > > @@ -290,6 +331,34 @@ void Exif::setThumbnail(Span<const unsigned char> thumbnail, > setShort(EXIF_IFD_0, EXIF_TAG_COMPRESSION, compression); > } > > +/** > + * \brief Convert UTF-8 string to UTF-16 string > + * \param[in] str String to convert > + * > + * \return \a str in UTF-16 > + */ > +std::u16string Exif::utf8ToUtf16(const std::string &str) > +{ > + std::mbstate_t state{}; This should be mbstate_t as you're including uchar.h. > + char16_t c16; > + const char *ptr = str.data(); > + const char *end = ptr + str.size(); > + > + std::u16string ret; > + while (size_t rc = mbrtoc16(&c16, ptr, end - ptr + 1, &state)) { > + if (rc == static_cast<size_t>(-2) || > + rc == static_cast<size_t>(-1)) > + break; > + > + ret.push_back(c16); > + > + if (rc > 0) > + ptr += rc; > + } > + > + return ret; > +} > + > [[nodiscard]] int Exif::generate() > { > if (exifData_) { > diff --git a/src/android/jpeg/exif.h b/src/android/jpeg/exif.h > index 5cab4559..db98ba63 100644 > --- a/src/android/jpeg/exif.h > +++ b/src/android/jpeg/exif.h > @@ -26,6 +26,14 @@ public: > JPEG = 6, > }; > > + enum StringEncoding { > + NoEncoding = 0, > + ASCII = 1, > + JIS = 2, > + Unicode = 3, > + Undefined = 4, > + }; > + > void setMake(const std::string &make); > void setModel(const std::string &model); > > @@ -46,9 +54,11 @@ private: > void setShort(ExifIfd ifd, ExifTag tag, uint16_t item); > void setLong(ExifIfd ifd, ExifTag tag, uint32_t item); > void setString(ExifIfd ifd, ExifTag tag, ExifFormat format, > - const std::string &item); > + StringEncoding enc, const std::string &item); Would it make sense to move the encoding parameter last, to default it to NoEncoding ? That way most of the callers (and in particular the ones that pass EXIF_FORMAT_ASCII for format) wouldn't need to set enc to NoEncoding explicitly. > void setRational(ExifIfd ifd, ExifTag tag, ExifRational item); > > + std::u16string utf8ToUtf16(const std::string &str); > + > bool valid_; > > ExifData *data_;
diff --git a/src/android/jpeg/exif.cpp b/src/android/jpeg/exif.cpp index 33b3fa7f..cff366a4 100644 --- a/src/android/jpeg/exif.cpp +++ b/src/android/jpeg/exif.cpp @@ -7,6 +7,9 @@ #include "exif.h" +#include <map> +#include <uchar.h> + #include "libcamera/internal/log.h" #include "libcamera/internal/utils.h" @@ -64,7 +67,7 @@ Exif::Exif() exif_data_set_byte_order(data_, order_); setString(EXIF_IFD_EXIF, EXIF_TAG_EXIF_VERSION, - EXIF_FORMAT_UNDEFINED, "0231"); + EXIF_FORMAT_UNDEFINED, NoEncoding, "0231"); /* Create the mandatory EXIF fields with default data. */ exif_data_fix(data_); @@ -178,10 +181,18 @@ void Exif::setRational(ExifIfd ifd, ExifTag tag, ExifRational item) exif_entry_unref(entry); } -void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::string &item) +static const std::map<Exif::StringEncoding, std::vector<uint8_t>> StringEncodingCodes = { + { Exif::ASCII, { 0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00 } }, + { Exif::JIS, { 0x4a, 0x49, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + { Exif::Unicode, { 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, 0x00 } }, + { Exif::Undefined, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, +}; + +void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, + StringEncoding enc, const std::string &item) { std::string ascii; - size_t length; + size_t length = 0; const char *str; if (format == EXIF_FORMAT_ASCII) { @@ -190,14 +201,44 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str /* Pad 1 extra byte to null-terminate the ASCII string. */ length = ascii.length() + 1; - } else { - str = item.c_str(); - - /* - * Strings stored in different formats (EXIF_FORMAT_UNDEFINED) - * are not null-terminated. - */ - length = item.length(); + } else if (format == EXIF_FORMAT_UNDEFINED) { + std::vector<uint8_t> buf; + const std::vector<uint8_t> &constbuf = buf; + std::u16string u16str; + + switch (enc) { + case Unicode: + buf = StringEncodingCodes.find(enc)->second; + u16str = utf8ToUtf16(item); + + /* Little-endian */ + buf.resize(8 + u16str.size() * 2); + for (size_t i = 0; i < u16str.size(); i++) { + buf[8 + 2 * i] = (u16str[i] & 0xff); + buf[8 + 2 * i + 1] = ((u16str[i] >> 8) & 0xff); + } + + str = reinterpret_cast<const char *>(constbuf.data()); + length = buf.size(); + + break; + /* \todo Support JIS */ + case JIS: + case ASCII: + case Undefined: + buf = StringEncodingCodes.find(enc)->second; + [[fallthrough]]; + default: + buf.insert(buf.end(), item.begin(), item.end()); + str = reinterpret_cast<const char *>(constbuf.data()); + + /* + * Strings stored in different formats (EXIF_FORMAT_UNDEFINED) + * are not null-terminated. + */ + length = buf.size(); + break; + } } ExifEntry *entry = createEntry(ifd, tag, format, length, length); @@ -210,12 +251,12 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str void Exif::setMake(const std::string &make) { - setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, make); + setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, NoEncoding, make); } void Exif::setModel(const std::string &model) { - setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, model); + setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, NoEncoding, model); } void Exif::setSize(const Size &size) @@ -233,9 +274,9 @@ void Exif::setTimestamp(time_t timestamp) strftime(str, sizeof(str), "%Y:%m:%d %H:%M:%S", &tm); std::string ts(str); - setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, ts); - setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, ts); - setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, ts); + setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, NoEncoding, ts); + setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, NoEncoding, ts); + setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, NoEncoding, ts); /* Query and set timezone information if available. */ int r = strftime(str, sizeof(str), "%z", &tm); @@ -244,13 +285,13 @@ void Exif::setTimestamp(time_t timestamp) tz.insert(3, 1, ':'); setString(EXIF_IFD_EXIF, static_cast<ExifTag>(_ExifTag::OFFSET_TIME), - EXIF_FORMAT_ASCII, tz); + EXIF_FORMAT_ASCII, NoEncoding, tz); setString(EXIF_IFD_EXIF, static_cast<ExifTag>(_ExifTag::OFFSET_TIME_ORIGINAL), - EXIF_FORMAT_ASCII, tz); + EXIF_FORMAT_ASCII, NoEncoding, tz); setString(EXIF_IFD_EXIF, static_cast<ExifTag>(_ExifTag::OFFSET_TIME_DIGITIZED), - EXIF_FORMAT_ASCII, tz); + EXIF_FORMAT_ASCII, NoEncoding, tz); } } @@ -290,6 +331,34 @@ void Exif::setThumbnail(Span<const unsigned char> thumbnail, setShort(EXIF_IFD_0, EXIF_TAG_COMPRESSION, compression); } +/** + * \brief Convert UTF-8 string to UTF-16 string + * \param[in] str String to convert + * + * \return \a str in UTF-16 + */ +std::u16string Exif::utf8ToUtf16(const std::string &str) +{ + std::mbstate_t state{}; + char16_t c16; + const char *ptr = str.data(); + const char *end = ptr + str.size(); + + std::u16string ret; + while (size_t rc = mbrtoc16(&c16, ptr, end - ptr + 1, &state)) { + if (rc == static_cast<size_t>(-2) || + rc == static_cast<size_t>(-1)) + break; + + ret.push_back(c16); + + if (rc > 0) + ptr += rc; + } + + return ret; +} + [[nodiscard]] int Exif::generate() { if (exifData_) { diff --git a/src/android/jpeg/exif.h b/src/android/jpeg/exif.h index 5cab4559..db98ba63 100644 --- a/src/android/jpeg/exif.h +++ b/src/android/jpeg/exif.h @@ -26,6 +26,14 @@ public: JPEG = 6, }; + enum StringEncoding { + NoEncoding = 0, + ASCII = 1, + JIS = 2, + Unicode = 3, + Undefined = 4, + }; + void setMake(const std::string &make); void setModel(const std::string &model); @@ -46,9 +54,11 @@ private: void setShort(ExifIfd ifd, ExifTag tag, uint16_t item); void setLong(ExifIfd ifd, ExifTag tag, uint32_t item); void setString(ExifIfd ifd, ExifTag tag, ExifFormat format, - const std::string &item); + StringEncoding enc, const std::string &item); void setRational(ExifIfd ifd, ExifTag tag, ExifRational item); + std::u16string utf8ToUtf16(const std::string &str); + bool valid_; ExifData *data_;
GPSProcessingMethod and UserComment in EXIF tags can be in UTF-16. Expand setString to take an encoding when the field type is undefined. Update callers accordingly. Signed-off-by: Paul Elder <paul.elder@ideasonboard.com> --- Changes in v2: - moved from utils into exif - support no-encoding --- src/android/jpeg/exif.cpp | 107 +++++++++++++++++++++++++++++++------- src/android/jpeg/exif.h | 12 ++++- 2 files changed, 99 insertions(+), 20 deletions(-)