在使用DataInputStream时候,经常会使用到readUTF来读取一段字符串,但是readUTF是如何知道该字符串有多长的呢?
下面的例子是对DataInputStream的简单使用:
package s1;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
public class X {
public static void main(String[] args) {
DataOutputStream out = null;
DataInputStream in = null;
try {
out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream("test.txt")));
out.writeDouble(3.14159);
out.writeUTF("This is PI");
out.writeDouble(1.41413);
out.writeUTF("Square root of 2");
in = new DataInputStream(new BufferedInputStream(new FileInputStream("test.txt")));
System.out.println(in.readDouble());
// 如何知道字符串有多长的???
System.out.println(in.readUTF());
System.out.println(in.readDouble());
System.out.println(in.readUTF());
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(out != null) {
out.close();
}
if(in != null) {
in.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
其实原因并不复杂,因为在writeUTF的时候并不是简简单单地写入字符串内容,其实还有字符串的长度。可以参考源码:
static int writeUTF(String str, DataOutput out) throws IOException {
int strlen = str.length();
int utflen = 0;
int c, count = 0;
/* use charAt instead of copying String to char array */
for (int i = 0; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
utflen++;
} else if (c > 0x07FF) {
utflen += 3;
} else {
utflen += 2;
}
}
if (utflen > 65535)
throw new UTFDataFormatException(
"encoded string too long: " + utflen + " bytes");
byte[] bytearr = null;
if (out instanceof DataOutputStream) {
DataOutputStream dos = (DataOutputStream)out;
if(dos.bytearr == null || (dos.bytearr.length < (utflen+2)))
dos.bytearr = new byte[(utflen*2) + 2];
bytearr = dos.bytearr;
} else {
bytearr = new byte[utflen+2];
}
// 将字符串的字节长度写入流中
bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);
int i=0;
for (i=0; i<strlen; i++) {
c = str.charAt(i);
if (!((c >= 0x0001) && (c <= 0x007F))) break;
bytearr[count++] = (byte) c;
}
for (;i < strlen; i++){
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
bytearr[count++] = (byte) c;
} else if (c > 0x07FF) {
bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
} else {
bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
}
}
out.write(bytearr, 0, utflen+2);
// 写入的长度在字符串中增加了2,即字节长度标识所占用的资源
return utflen + 2;
}
在readUTF时候,先读取字符串的长度,然后再读取字符串。源码:
public final static String readUTF(DataInput in) throws IOException {
// 读取字符串的长度
int utflen = in.readUnsignedShort();
// 读取字符串内容
.
.
.
}