diff --git a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcConstants.java b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcConstants.java index 4768efae3..3311f0117 100644 --- a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcConstants.java +++ b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcConstants.java @@ -88,7 +88,8 @@ public final class IptcConstants { public static final int IPTC_RECORD_TAG_MARKER = 0x1c; public static final int IPTC_ENVELOPE_RECORD_NUMBER = 0x01; public static final int IPTC_APPLICATION_2_RECORD_NUMBER = 0x02; - + public static final int IPTC_ENV_TAG_CODED_CHARACTER_SET = 0x5A; + private IptcConstants() { } } diff --git a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java index 17698a067..2067c13cb 100644 --- a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java +++ b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.ByteOrder; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -42,6 +44,8 @@ public class IptcParser extends BinaryFileParser { private static final ByteOrder APP13_BYTE_ORDER = ByteOrder.BIG_ENDIAN; + private static final String DEFAULT_ENCODING = "ISO-8859-1"; + private static final String UTF_8 = "utf8"; public IptcParser() { setByteOrder(ByteOrder.BIG_ENDIAN); @@ -125,6 +129,7 @@ public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, protected List parseIPTCBlock(final byte[] bytes, final boolean verbose) throws IOException { final List elements = new ArrayList(); + String encoding = DEFAULT_ENCODING; int index = 0; // Integer recordVersion = null; @@ -190,6 +195,11 @@ protected List parseIPTCBlock(final byte[] bytes, final boolean verb // Debug.debug("recordSize", recordSize + " (0x" // + Integer.toHexString(recordSize) + ")"); + if(recordNumber == IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER && recordType == IptcConstants.IPTC_ENV_TAG_CODED_CHARACTER_SET){ + encoding = getEncodingCharsetName(recordData); + continue; + } + if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) { continue; } @@ -226,7 +236,7 @@ protected List parseIPTCBlock(final byte[] bytes, final boolean verb // continue; // } - final String value = new String(recordData, "ISO-8859-1"); + final String value = new String(recordData, encoding); final IptcType iptcType = IptcTypeLookup.getIptcType(recordType); @@ -248,6 +258,35 @@ protected List parseIPTCBlock(final byte[] bytes, final boolean verb return elements; } + private String getEncodingCharsetName(byte[] codedCharacterSet){ + final Character WHITESPACE = ' '; + String codedCharacterSetString = new String(codedCharacterSet); + try { + if (Charset.isSupported(codedCharacterSetString)) { + return codedCharacterSetString; + } + }catch (IllegalCharsetNameException e){ + + }catch (IllegalArgumentException e){ + + } + //check if encoding is utf8 escape sequence + byte[] utf8EscSeq = new byte[]{'\u001B','%','G'}; + int j=0; + boolean match = true; + for(byte character : codedCharacterSet){ + if(!WHITESPACE.equals(character) && utf8EscSeq[j++] != character) { + match = false; + } + } + + if(match){ + return UTF_8; + } + + return DEFAULT_ENCODING; + } + protected List parseAllBlocks(final byte[] bytes, final boolean verbose, final boolean strict) throws ImageReadException, IOException { final List blocks = new ArrayList(); @@ -438,8 +477,8 @@ public int compare(final IptcRecord e1, final IptcRecord e2) { } bos.write(element.iptcType.getType()); - final byte[] recordData = element.value.getBytes("ISO-8859-1"); - if (!new String(recordData, "ISO-8859-1").equals(element.value)) { + final byte[] recordData = element.value.getBytes(DEFAULT_ENCODING); + if (!new String(recordData, DEFAULT_ENCODING).equals(element.value)) { throw new ImageWriteException( "Invalid record value, not ISO-8859-1"); } @@ -456,5 +495,4 @@ public int compare(final IptcRecord e1, final IptcRecord e2) { return blockData; } - } diff --git a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcRecord.java b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcRecord.java index 97b0011fb..efb8371ea 100644 --- a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcRecord.java +++ b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcRecord.java @@ -38,11 +38,11 @@ public IptcRecord(final IptcType iptcType, final byte[] bytes, final String valu this.value = value; } - public IptcRecord(final IptcType iptcType, final String value) { + public IptcRecord(final IptcType iptcType, final String value, final String charsetName) { this.iptcType = iptcType; byte[] tempBytes; try { - tempBytes = value.getBytes("ISO-8859-1"); + tempBytes = value.getBytes(charsetName); } catch (final UnsupportedEncodingException cannotHappen) { tempBytes = null; } @@ -50,6 +50,10 @@ public IptcRecord(final IptcType iptcType, final String value) { this.value = value; } + public IptcRecord(final IptcType iptcType, final String value) { + this(iptcType, value, "ISO-8859-1"); + } + public byte[] getRawBytes() { return bytes.clone(); } diff --git a/src/test/data/images/iptc/2/test.jpeg b/src/test/data/images/iptc/2/test.jpeg new file mode 100644 index 000000000..4407f6612 Binary files /dev/null and b/src/test/data/images/iptc/2/test.jpeg differ diff --git a/src/test/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcCodedCharacterSetTest.java b/src/test/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcCodedCharacterSetTest.java new file mode 100644 index 000000000..5186d4c78 --- /dev/null +++ b/src/test/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcCodedCharacterSetTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.imaging.formats.jpeg.iptc; + +import org.apache.commons.imaging.ImagingTestConstants; +import org.apache.commons.imaging.common.ImageMetadata; +import org.apache.commons.imaging.common.bytesource.ByteSource; +import org.apache.commons.imaging.common.bytesource.ByteSourceFile; +import org.apache.commons.imaging.formats.jpeg.JpegImageParser; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.nio.charset.Charset; +import java.util.Collection; +import java.util.Collections; + +import static org.junit.Assert.fail; + + +@RunWith(Parameterized.class) +public class IptcCodedCharacterSetTest extends IptcBaseTest { + + private File imageFile; + + @Parameterized.Parameters + public static Collection data() throws Exception { + return Collections.singleton(new File(ImagingTestConstants.TEST_IMAGE_FOLDER, "iptc/2/test.jpeg")); + } + + public IptcCodedCharacterSetTest(File imageFile) { + this.imageFile = imageFile; + } + + @Test + public void testCodedCharacterSet() throws Exception { + byte[] bytePatternToCompare = new byte[] + {-28,-68,-102,-26,-124,-113,-27,-83,-105}; + + String requiredCaption = new String( bytePatternToCompare , "utf8"); + String metadataName = "Caption/Abstract"; + + final ByteSource byteSource = new ByteSourceFile(imageFile); + JpegImageParser jpegImageParser = new JpegImageParser(); + ImageMetadata metadata = jpegImageParser.getMetadata(byteSource, null); + for (ImageMetadata.ImageMetadataItem item : metadata.getItems()) { + String metadataVal = item.toString(); + String[] metadataKeyValuePair = metadataVal.split(":", 2); + if (metadataKeyValuePair.length > 1 && metadataKeyValuePair[0].equalsIgnoreCase(metadataName) && !metadataKeyValuePair[1].trim().equals(requiredCaption)) { + fail("metadata extraction failed"); + } + } + } +}