Thursday, March 12, 2015

UTFDataFormatException

java.io.UTFDataFormatException

UTFDataFormatException is described in the javadoc comments as:

Signals that a malformed string in modified UTF-8 format has been read in a data input stream or by any class that implements the data input interface. See the DataInput class description for the format in which modified UTF-8 strings are read and written.
author: Frank Yellin version: 1.13, 05/13/04 see: java.io.DataInput see: java.io.DataInputStream#readUTF(java.io.DataInput) see: java.io.IOException since: JDK1.0

Where is this exception thrown?

Following, is a list of exception messages cross-referenced to the source code responsible for throwing them. Click on the method link to view the code and see how the exception is thrown.

How is this exception thrown?

The following sub-sections identify where this exception is thrown, and how (or why) the code is throwing the exception.

Any source code quoted in this section is subject to the Java Research License unless stated otherwise.

java.io.DataOutputStream.writeUTF(String, DataOutput)

/**
     * Writes a string to the specified DataOutput using
     * <a href='DataInput.html#modified-utf-8'>modified UTF-8</a>
     * encoding in a machine-independent manner. 
     * First, two bytes are written to out as if by the <code>writeShort</code>
     * method giving the number of bytes to follow. This value is the number of
     * bytes actually written out, not the length of the string. Following the
     * length, each character of the string is output, in sequence, using the
     * modified UTF-8 encoding for the character. If no exception is thrown, the
     * counter <code>written</code> is incremented by the total number of 
     * bytes written to the output stream. This will be at least two 
     * plus the length of <code>str</code>, and at most two plus 
     * thrice the length of <code>str</code>.
     * @param      str   a string to be written.
     * @param      out   destination to write to
     * @return     The number of bytes written out.
     * @exception  IOException  if an I/O error occurs.
     */
static int writeUTF(String str, DataOutput out) throws IOException {
    int strlen = str.length();
    int utflen = 0;
    int c, count = 0;
    for (int i = 0; i < strlen; i++) {
        c = str.charAt(i);
        if ((c >= 0x0001) && (c <= 0x007F)) {
            utflen++;
        } else if (c > 0x07FF) {
            utflen += 3;
        } else {
            utflen += 2;
        }
    }
    if (utflen > 65535) throw new UTFDataFormatException('encoded string too long: ' + utflen + ' bytes');
    byte[] bytearr = null;
    if (out instanceof DataOutputStream) {
        DataOutputStream dos = (DataOutputStream) out;
        if (dos.bytearr == null || (dos.bytearr.length < (utflen + 2))) dos.bytearr = new byte[(utflen * 2) + 2];
        bytearr = dos.bytearr;
    } else {
        bytearr = new byte[utflen + 2];
    }
    bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
    bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);
    int i = 0;
    for (i = 0; i < strlen; i++) {
        c = str.charAt(i);
        if (!((c >= 0x0001) && (c <= 0x007F))) break;
        bytearr[count++] = (byte) c;
    }
    for (; i < strlen; i++) {
        c = str.charAt(i);
        if ((c >= 0x0001) && (c <= 0x007F)) {
            bytearr[count++] = (byte) c;
        } else if (c > 0x07FF) {
            bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
            bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
            bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
        } else {
            bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
            bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
        }
    }
    out.write(bytearr, 0, utflen + 2);
    return utflen + 2;
}

Source: "Java SE Downloads: Java SE 6 JDK Source Code", at: http://www.oracle.com/technetwork/java/javase/downloads/index.html

java.io.DataInputStream.readUTF(DataInput)

/**
     * Reads from the
     * stream <code>in</code> a representation
     * of a Unicode  character string encoded in
     * <a href='DataInput.html#modified-utf-8'>modified UTF-8</a> format;
     * this string of characters is then returned as a <code>String</code>.
     * The details of the modified UTF-8 representation
     * are  exactly the same as for the <code>readUTF</code>
     * method of <code>DataInput</code>.
     * @param      in   a data input stream.
     * @return     a Unicode string.
     * @exception  EOFException            if the input stream reaches the end
     *               before all the bytes.
     * @exception  IOException             if an I/O error occurs.
     * @exception  UTFDataFormatException  if the bytes do not represent a
     *               valid modified UTF-8 encoding of a Unicode string.
     * @see        java.io.DataInputStream#readUnsignedShort()
     */
public static final String readUTF(DataInput in) throws IOException {
    int utflen = in.readUnsignedShort();
    byte[] bytearr = null;
    char[] chararr = null;
    if (in instanceof DataInputStream) {
        DataInputStream dis = (DataInputStream) in;
        if (dis.bytearr.length < utflen) {
            dis.bytearr = new byte[utflen * 2];
            dis.chararr = new char[utflen * 2];
        }
        chararr = dis.chararr;
        bytearr = dis.bytearr;
    } else {
        bytearr = new byte[utflen];
        chararr = new char[utflen];
    }
    int c, char2, char3;
    int count = 0;
    int chararr_count = 0;
    in.readFully(bytearr, 0, utflen);
    while (count < utflen) {
        c = (int) bytearr[count] & 0xff;
        if (c > 127) break;
        count++;
        chararr[chararr_count++] = (char) c;
    }
    while (count < utflen) {
        c = (int) bytearr[count] & 0xff;
        switch(c >> 4) {
            case 0:
            case 1:
            case 2:
            case 3:
            case 4:
            case 5:
            case 6:
            case 7:
                count++;
                chararr[chararr_count++] = (char) c;
                break;
            case 12:
            case 13:
                count += 2;
                if (count > utflen) throw new UTFDataFormatException('malformed input: partial character at end');
                char2 = (int) bytearr[count - 1];
                if ((char2 & 0xC0) != 0x80) throw new UTFDataFormatException('malformed input around byte ' + count);
                chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
                break;
            case 14:
                count += 3;
                if (count > utflen) throw new UTFDataFormatException('malformed input: partial character at end');
                char2 = (int) bytearr[count - 2];
                char3 = (int) bytearr[count - 1];
                if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) throw new UTFDataFormatException('malformed input around byte ' + (count - 1));
                chararr[chararr_count++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0));
                break;
            default:
                throw new UTFDataFormatException('malformed input around byte ' + count);
        }
    }
    return new String(chararr, 0, chararr_count);
}

Source: "Java SE Downloads: Java SE 6 JDK Source Code", at: http://www.oracle.com/technetwork/java/javase/downloads/index.html

javax.imageio.stream.ImageOutputStreamImpl.writeUTF(String)

public void writeUTF(String s) throws IOException {
    int strlen = s.length();
    int utflen = 0;
    char[] charr = new char[strlen];
    int c, boff = 0;
    s.getChars(0, strlen, charr, 0);
    for (int i = 0; i < strlen; i++) {
        c = charr[i];
        if ((c >= 0x0001) && (c <= 0x007F)) {
            utflen++;
        } else if (c > 0x07FF) {
            utflen += 3;
        } else {
            utflen += 2;
        }
    }
    if (utflen > 65535) {
        throw new UTFDataFormatException('utflen > 65536!');
    }
    byte[] b = new byte[utflen + 2];
    b[boff++] = (byte) ((utflen >>> 8) & 0xFF);
    b[boff++] = (byte) ((utflen >>> 0) & 0xFF);
    for (int i = 0; i < strlen; i++) {
        c = charr[i];
        if ((c >= 0x0001) && (c <= 0x007F)) {
            b[boff++] = (byte) c;
        } else if (c > 0x07FF) {
            b[boff++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
            b[boff++] = (byte) (0x80 | ((c >> 6) & 0x3F));
            b[boff++] = (byte) (0x80 | ((c >> 0) & 0x3F));
        } else {
            b[boff++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
            b[boff++] = (byte) (0x80 | ((c >> 0) & 0x3F));
        }
    }
    write(b, 0, utflen + 2);
}

Source: "Java SE Downloads: Java SE 6 JDK Source Code", at: http://www.oracle.com/technetwork/java/javase/downloads/index.html

No comments:

Post a Comment