[libcamera-devel,v4,1/8] android: jpeg: exif: Expand setString to support different encodings
diff mbox series

Message ID 20210125071444.26252-2-paul.elder@ideasonboard.com
State Superseded
Delegated to: Paul Elder
Headers show
Series
  • Fill in android result metadata and EXIF tags
Related show

Commit Message

Paul Elder Jan. 25, 2021, 7:14 a.m. UTC
GPSProcessingMethod and UserComment in EXIF tags can be in UTF-16.
Expand setString to take an encoding when the field type is undefined.
Update callers accordingly.

Signed-off-by: Paul Elder <paul.elder@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>

---
Changes in v4:
- clean up switch

Changes in v3:
- use array to contain string encoding codes
- drop JIS and Undefined
- clean up setString a bit
- use the endianness of the exif

Changes in v2:
- moved from utils into exif
- support no-encoding
---
 src/android/jpeg/exif.cpp | 77 +++++++++++++++++++++++++++++++++++++--
 src/android/jpeg/exif.h   | 11 +++++-
 2 files changed, 84 insertions(+), 4 deletions(-)

Comments

Jacopo Mondi Jan. 25, 2021, 10:51 a.m. UTC | #1
Hi Paul,

On Mon, Jan 25, 2021 at 04:14:37PM +0900, Paul Elder wrote:
> GPSProcessingMethod and UserComment in EXIF tags can be in UTF-16.
> Expand setString to take an encoding when the field type is undefined.
> Update callers accordingly.
>
> Signed-off-by: Paul Elder <paul.elder@ideasonboard.com>
> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
>
> ---
> Changes in v4:
> - clean up switch
>
> Changes in v3:
> - use array to contain string encoding codes
> - drop JIS and Undefined
> - clean up setString a bit
> - use the endianness of the exif
>
> Changes in v2:
> - moved from utils into exif
> - support no-encoding
> ---
>  src/android/jpeg/exif.cpp | 77 +++++++++++++++++++++++++++++++++++++--
>  src/android/jpeg/exif.h   | 11 +++++-
>  2 files changed, 84 insertions(+), 4 deletions(-)
>
> diff --git a/src/android/jpeg/exif.cpp b/src/android/jpeg/exif.cpp
> index 33b3fa7f..89343323 100644
> --- a/src/android/jpeg/exif.cpp
> +++ b/src/android/jpeg/exif.cpp
> @@ -7,6 +7,9 @@
>
>  #include "exif.h"
>
> +#include <map>
> +#include <uchar.h>
> +
>  #include "libcamera/internal/log.h"
>  #include "libcamera/internal/utils.h"
>
> @@ -178,11 +181,18 @@ void Exif::setRational(ExifIfd ifd, ExifTag tag, ExifRational item)
>  	exif_entry_unref(entry);
>  }
>
> -void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::string &item)
> +static const std::map<Exif::StringEncoding, std::array<uint8_t, 8>> stringEncodingCodes = {
> +	{ Exif::ASCII,     { 0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00 } },
> +	{ Exif::Unicode,   { 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, 0x00 } },
> +};
> +
> +void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
> +		     const std::string &item, StringEncoding encoding)
>  {
>  	std::string ascii;
>  	size_t length;
>  	const char *str;
> +	std::vector<uint8_t> buf;
>
>  	if (format == EXIF_FORMAT_ASCII) {
>  		ascii = utils::toAscii(item);
> @@ -191,13 +201,46 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
>  		/* Pad 1 extra byte to null-terminate the ASCII string. */
>  		length = ascii.length() + 1;
>  	} else {
> -		str = item.c_str();
> +		std::u16string u16str;
> +
> +		auto encodingString = stringEncodingCodes.find(encoding);
> +		if (encodingString != stringEncodingCodes.end()) {
> +			buf = {
> +				encodingString->second.begin(),
> +				encodingString->second.end()
> +			};
> +		}
> +
> +		switch (encoding) {
> +		case Unicode:

I would have called Exif::UTF-16 and Exif::UTF-8 to avoid repeating a
check for ASCII.

        if (format == ASCII) {

        } else {
                switch (encoding) {
                case Unicode:
                        break;
                case Ascii:
                default:
                        break
                }
        }

Very minor though

> +			u16str = utf8ToUtf16(item);
> +
> +			buf.resize(8 + u16str.size() * 2);
> +			for (size_t i = 0; i < u16str.size(); i++) {
> +				if (order_ == EXIF_BYTE_ORDER_INTEL) {
> +					buf[8 + 2 * i] = u16str[i] & 0xff;
> +					buf[8 + 2 * i + 1] = (u16str[i] >> 8) & 0xff;
> +				} else {
> +					buf[8 + 2 * i] = (u16str[i] >> 8) & 0xff;
> +					buf[8 + 2 * i + 1] = u16str[i] & 0xff;
> +				}
> +			}
> +
> +			break;
> +
> +		case ASCII:
> +		case NoEncoding:
> +			buf.insert(buf.end(), item.begin(), item.end());
> +			break;
> +		}
> +
> +		str = reinterpret_cast<const char *>(buf.data());
>
>  		/*
>  		 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
>  		 * are not null-terminated.
>  		 */

Is this still true for Unicode and Ascii encoded strings ? Just
checking...

> -		length = item.length();
> +		length = buf.size();
>  	}
>
>  	ExifEntry *entry = createEntry(ifd, tag, format, length, length);
> @@ -290,6 +333,34 @@ void Exif::setThumbnail(Span<const unsigned char> thumbnail,
>  	setShort(EXIF_IFD_0, EXIF_TAG_COMPRESSION, compression);
>  }
>
> +/**
> + * \brief Convert UTF-8 string to UTF-16 string
> + * \param[in] str String to convert
> + *
> + * \return \a str in UTF-16
> + */
> +std::u16string Exif::utf8ToUtf16(const std::string &str)
> +{
> +	mbstate_t state{};
> +	char16_t c16;
> +	const char *ptr = str.data();
> +	const char *end = ptr + str.size();
> +
> +	std::u16string ret;
> +	while (size_t rc = mbrtoc16(&c16, ptr, end - ptr + 1, &state)) {
> +		if (rc == static_cast<size_t>(-2) ||
> +		    rc == static_cast<size_t>(-1))
> +			break;
> +
> +		ret.push_back(c16);
> +
> +		if (rc > 0)
> +			ptr += rc;
> +	}

I thought endianess was better handled here, but it has probably been
changed upon a review comment, so no problem...

The rest looks good!
Reviewed-by: Jacopo Mondi <jacopo@jmondi.org>

Thanks
  j


> +
> +	return ret;
> +}
> +
>  [[nodiscard]] int Exif::generate()
>  {
>  	if (exifData_) {
> diff --git a/src/android/jpeg/exif.h b/src/android/jpeg/exif.h
> index 5cab4559..8b84165b 100644
> --- a/src/android/jpeg/exif.h
> +++ b/src/android/jpeg/exif.h
> @@ -26,6 +26,12 @@ public:
>  		JPEG = 6,
>  	};
>
> +	enum StringEncoding {
> +		NoEncoding = 0,
> +		ASCII = 1,
> +		Unicode = 2,
> +	};
> +
>  	void setMake(const std::string &make);
>  	void setModel(const std::string &model);
>
> @@ -46,9 +52,12 @@ private:
>  	void setShort(ExifIfd ifd, ExifTag tag, uint16_t item);
>  	void setLong(ExifIfd ifd, ExifTag tag, uint32_t item);
>  	void setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
> -		       const std::string &item);
> +		       const std::string &item,
> +		       StringEncoding encoding = NoEncoding);
>  	void setRational(ExifIfd ifd, ExifTag tag, ExifRational item);
>
> +	std::u16string utf8ToUtf16(const std::string &str);
> +
>  	bool valid_;
>
>  	ExifData *data_;
> --
> 2.27.0
>
> _______________________________________________
> libcamera-devel mailing list
> libcamera-devel@lists.libcamera.org
> https://lists.libcamera.org/listinfo/libcamera-devel
Laurent Pinchart Jan. 25, 2021, 11:06 a.m. UTC | #2
Hi Jacopo,

On Mon, Jan 25, 2021 at 11:51:56AM +0100, Jacopo Mondi wrote:
> On Mon, Jan 25, 2021 at 04:14:37PM +0900, Paul Elder wrote:
> > GPSProcessingMethod and UserComment in EXIF tags can be in UTF-16.
> > Expand setString to take an encoding when the field type is undefined.
> > Update callers accordingly.
> >
> > Signed-off-by: Paul Elder <paul.elder@ideasonboard.com>
> > Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> >
> > ---
> > Changes in v4:
> > - clean up switch
> >
> > Changes in v3:
> > - use array to contain string encoding codes
> > - drop JIS and Undefined
> > - clean up setString a bit
> > - use the endianness of the exif
> >
> > Changes in v2:
> > - moved from utils into exif
> > - support no-encoding
> > ---
> >  src/android/jpeg/exif.cpp | 77 +++++++++++++++++++++++++++++++++++++--
> >  src/android/jpeg/exif.h   | 11 +++++-
> >  2 files changed, 84 insertions(+), 4 deletions(-)
> >
> > diff --git a/src/android/jpeg/exif.cpp b/src/android/jpeg/exif.cpp
> > index 33b3fa7f..89343323 100644
> > --- a/src/android/jpeg/exif.cpp
> > +++ b/src/android/jpeg/exif.cpp
> > @@ -7,6 +7,9 @@
> >
> >  #include "exif.h"
> >
> > +#include <map>
> > +#include <uchar.h>
> > +
> >  #include "libcamera/internal/log.h"
> >  #include "libcamera/internal/utils.h"
> >
> > @@ -178,11 +181,18 @@ void Exif::setRational(ExifIfd ifd, ExifTag tag, ExifRational item)
> >  	exif_entry_unref(entry);
> >  }
> >
> > -void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::string &item)
> > +static const std::map<Exif::StringEncoding, std::array<uint8_t, 8>> stringEncodingCodes = {
> > +	{ Exif::ASCII,     { 0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00 } },
> > +	{ Exif::Unicode,   { 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, 0x00 } },
> > +};
> > +
> > +void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
> > +		     const std::string &item, StringEncoding encoding)
> >  {
> >  	std::string ascii;
> >  	size_t length;
> >  	const char *str;
> > +	std::vector<uint8_t> buf;
> >
> >  	if (format == EXIF_FORMAT_ASCII) {
> >  		ascii = utils::toAscii(item);
> > @@ -191,13 +201,46 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
> >  		/* Pad 1 extra byte to null-terminate the ASCII string. */
> >  		length = ascii.length() + 1;
> >  	} else {
> > -		str = item.c_str();
> > +		std::u16string u16str;
> > +
> > +		auto encodingString = stringEncodingCodes.find(encoding);
> > +		if (encodingString != stringEncodingCodes.end()) {
> > +			buf = {
> > +				encodingString->second.begin(),
> > +				encodingString->second.end()
> > +			};
> > +		}
> > +
> > +		switch (encoding) {
> > +		case Unicode:
> 
> I would have called Exif::UTF-16 and Exif::UTF-8 to avoid repeating a
> check for ASCII.
> 
>         if (format == ASCII) {
> 
>         } else {
>                 switch (encoding) {
>                 case Unicode:
>                         break;
>                 case Ascii:
>                 default:
>                         break
>                 }
>         }
> 
> Very minor though

Exif is amazing, it has a field format that can be ASCII or UNDEFINED
("undefined" is an amazing format...). Tags in ASCII format 0-terminated
and need to be encoded in ASCII, tags in UNDEFINED format are not
0-terminated and can be either "not encoded" (which means there's no
encoding information, as far as I remember this is only used for the
Exif version string, which is effectively ASCII), or encoded in
"UNICODE", "JIS", "ASCII" or "UNDEFINED" (all these are "defined" as
part of the Exif spec). The Ascii case below is the ASCII encoding
applied to the UNDEFINED format, it's not UTF-8.

> > +			u16str = utf8ToUtf16(item);
> > +
> > +			buf.resize(8 + u16str.size() * 2);
> > +			for (size_t i = 0; i < u16str.size(); i++) {
> > +				if (order_ == EXIF_BYTE_ORDER_INTEL) {
> > +					buf[8 + 2 * i] = u16str[i] & 0xff;
> > +					buf[8 + 2 * i + 1] = (u16str[i] >> 8) & 0xff;
> > +				} else {
> > +					buf[8 + 2 * i] = (u16str[i] >> 8) & 0xff;
> > +					buf[8 + 2 * i + 1] = u16str[i] & 0xff;
> > +				}
> > +			}
> > +
> > +			break;
> > +
> > +		case ASCII:
> > +		case NoEncoding:
> > +			buf.insert(buf.end(), item.begin(), item.end());
> > +			break;
> > +		}
> > +
> > +		str = reinterpret_cast<const char *>(buf.data());
> >
> >  		/*
> >  		 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
> >  		 * are not null-terminated.
> >  		 */
> 
> Is this still true for Unicode and Ascii encoded strings ? Just
> checking...
> 
> > -		length = item.length();
> > +		length = buf.size();
> >  	}
> >
> >  	ExifEntry *entry = createEntry(ifd, tag, format, length, length);
> > @@ -290,6 +333,34 @@ void Exif::setThumbnail(Span<const unsigned char> thumbnail,
> >  	setShort(EXIF_IFD_0, EXIF_TAG_COMPRESSION, compression);
> >  }
> >
> > +/**
> > + * \brief Convert UTF-8 string to UTF-16 string
> > + * \param[in] str String to convert
> > + *
> > + * \return \a str in UTF-16
> > + */
> > +std::u16string Exif::utf8ToUtf16(const std::string &str)
> > +{
> > +	mbstate_t state{};
> > +	char16_t c16;
> > +	const char *ptr = str.data();
> > +	const char *end = ptr + str.size();
> > +
> > +	std::u16string ret;
> > +	while (size_t rc = mbrtoc16(&c16, ptr, end - ptr + 1, &state)) {
> > +		if (rc == static_cast<size_t>(-2) ||
> > +		    rc == static_cast<size_t>(-1))
> > +			break;
> > +
> > +		ret.push_back(c16);
> > +
> > +		if (rc > 0)
> > +			ptr += rc;
> > +	}
> 
> I thought endianess was better handled here, but it has probably been
> changed upon a review comment, so no problem...

This function returns a std::u16string, not an array of bytes, so
endianness can't be handled here.

> The rest looks good!
> Reviewed-by: Jacopo Mondi <jacopo@jmondi.org>
> 
> > +
> > +	return ret;
> > +}
> > +
> >  [[nodiscard]] int Exif::generate()
> >  {
> >  	if (exifData_) {
> > diff --git a/src/android/jpeg/exif.h b/src/android/jpeg/exif.h
> > index 5cab4559..8b84165b 100644
> > --- a/src/android/jpeg/exif.h
> > +++ b/src/android/jpeg/exif.h
> > @@ -26,6 +26,12 @@ public:
> >  		JPEG = 6,
> >  	};
> >
> > +	enum StringEncoding {
> > +		NoEncoding = 0,
> > +		ASCII = 1,
> > +		Unicode = 2,
> > +	};
> > +
> >  	void setMake(const std::string &make);
> >  	void setModel(const std::string &model);
> >
> > @@ -46,9 +52,12 @@ private:
> >  	void setShort(ExifIfd ifd, ExifTag tag, uint16_t item);
> >  	void setLong(ExifIfd ifd, ExifTag tag, uint32_t item);
> >  	void setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
> > -		       const std::string &item);
> > +		       const std::string &item,
> > +		       StringEncoding encoding = NoEncoding);
> >  	void setRational(ExifIfd ifd, ExifTag tag, ExifRational item);
> >
> > +	std::u16string utf8ToUtf16(const std::string &str);
> > +
> >  	bool valid_;
> >
> >  	ExifData *data_;

Patch
diff mbox series

diff --git a/src/android/jpeg/exif.cpp b/src/android/jpeg/exif.cpp
index 33b3fa7f..89343323 100644
--- a/src/android/jpeg/exif.cpp
+++ b/src/android/jpeg/exif.cpp
@@ -7,6 +7,9 @@ 
 
 #include "exif.h"
 
+#include <map>
+#include <uchar.h>
+
 #include "libcamera/internal/log.h"
 #include "libcamera/internal/utils.h"
 
@@ -178,11 +181,18 @@  void Exif::setRational(ExifIfd ifd, ExifTag tag, ExifRational item)
 	exif_entry_unref(entry);
 }
 
-void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::string &item)
+static const std::map<Exif::StringEncoding, std::array<uint8_t, 8>> stringEncodingCodes = {
+	{ Exif::ASCII,     { 0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00 } },
+	{ Exif::Unicode,   { 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, 0x00 } },
+};
+
+void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
+		     const std::string &item, StringEncoding encoding)
 {
 	std::string ascii;
 	size_t length;
 	const char *str;
+	std::vector<uint8_t> buf;
 
 	if (format == EXIF_FORMAT_ASCII) {
 		ascii = utils::toAscii(item);
@@ -191,13 +201,46 @@  void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
 		/* Pad 1 extra byte to null-terminate the ASCII string. */
 		length = ascii.length() + 1;
 	} else {
-		str = item.c_str();
+		std::u16string u16str;
+
+		auto encodingString = stringEncodingCodes.find(encoding);
+		if (encodingString != stringEncodingCodes.end()) {
+			buf = {
+				encodingString->second.begin(),
+				encodingString->second.end()
+			};
+		}
+
+		switch (encoding) {
+		case Unicode:
+			u16str = utf8ToUtf16(item);
+
+			buf.resize(8 + u16str.size() * 2);
+			for (size_t i = 0; i < u16str.size(); i++) {
+				if (order_ == EXIF_BYTE_ORDER_INTEL) {
+					buf[8 + 2 * i] = u16str[i] & 0xff;
+					buf[8 + 2 * i + 1] = (u16str[i] >> 8) & 0xff;
+				} else {
+					buf[8 + 2 * i] = (u16str[i] >> 8) & 0xff;
+					buf[8 + 2 * i + 1] = u16str[i] & 0xff;
+				}
+			}
+
+			break;
+
+		case ASCII:
+		case NoEncoding:
+			buf.insert(buf.end(), item.begin(), item.end());
+			break;
+		}
+
+		str = reinterpret_cast<const char *>(buf.data());
 
 		/*
 		 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
 		 * are not null-terminated.
 		 */
-		length = item.length();
+		length = buf.size();
 	}
 
 	ExifEntry *entry = createEntry(ifd, tag, format, length, length);
@@ -290,6 +333,34 @@  void Exif::setThumbnail(Span<const unsigned char> thumbnail,
 	setShort(EXIF_IFD_0, EXIF_TAG_COMPRESSION, compression);
 }
 
+/**
+ * \brief Convert UTF-8 string to UTF-16 string
+ * \param[in] str String to convert
+ *
+ * \return \a str in UTF-16
+ */
+std::u16string Exif::utf8ToUtf16(const std::string &str)
+{
+	mbstate_t state{};
+	char16_t c16;
+	const char *ptr = str.data();
+	const char *end = ptr + str.size();
+
+	std::u16string ret;
+	while (size_t rc = mbrtoc16(&c16, ptr, end - ptr + 1, &state)) {
+		if (rc == static_cast<size_t>(-2) ||
+		    rc == static_cast<size_t>(-1))
+			break;
+
+		ret.push_back(c16);
+
+		if (rc > 0)
+			ptr += rc;
+	}
+
+	return ret;
+}
+
 [[nodiscard]] int Exif::generate()
 {
 	if (exifData_) {
diff --git a/src/android/jpeg/exif.h b/src/android/jpeg/exif.h
index 5cab4559..8b84165b 100644
--- a/src/android/jpeg/exif.h
+++ b/src/android/jpeg/exif.h
@@ -26,6 +26,12 @@  public:
 		JPEG = 6,
 	};
 
+	enum StringEncoding {
+		NoEncoding = 0,
+		ASCII = 1,
+		Unicode = 2,
+	};
+
 	void setMake(const std::string &make);
 	void setModel(const std::string &model);
 
@@ -46,9 +52,12 @@  private:
 	void setShort(ExifIfd ifd, ExifTag tag, uint16_t item);
 	void setLong(ExifIfd ifd, ExifTag tag, uint32_t item);
 	void setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
-		       const std::string &item);
+		       const std::string &item,
+		       StringEncoding encoding = NoEncoding);
 	void setRational(ExifIfd ifd, ExifTag tag, ExifRational item);
 
+	std::u16string utf8ToUtf16(const std::string &str);
+
 	bool valid_;
 
 	ExifData *data_;