Avro提供了两种序列化的方式:
avro-specific:
和thrift一样可以通过idl方式生成代码,生成命令:
java -jar avro-tools-1.7.4.jar compile schema data.avsc src/
avro-generic:
支持schema的动态加载,不需要重新编译就可以处理新的数据源
avro的数据类型:
基本类型:
null: no value
boolean: a binary value
int: 32-bit signed integer
long: 64-bit signed integer
float: single precision (32-bit) IEEE 754 floating-point number
double: double precision (64-bit) IEEE 754 floating-point number
bytes: sequence of 8-bit unsigned bytes
string: unicode character sequence
复杂类型:
avro支持6种复杂类型:records, enums, arrays, maps, unionsm, fixed
下面是一个定义为records例子:
{
"type": "record", // 指定record类型,(required)
"name": "AvroData", // 生成的类名称,(required)
"namespace": "com.wuwen.", // 命令空间,(optional)
"doc": "avro scheama test.", // 用来描述该schema的,(optional)
"aliases": ["avroaliases"], // name的别名,(optional)
"fields": [ // 字段名,(required)
{"name": "GUID", "type": "string"}, // 定义一个字段名为GUID,基本数据类型为string的字段
{"name": "Content", "type": "string"},
{"name": "Time", "type": "long"}
]
}
例子:
package com.wuwen; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; import org.apache.avro.file.FileReader; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.avro.util.Utf8; public class AvroDemo { public void serializeGeneric() throws IOException { Schema schema = Schema.parse(new File("E:/avro/AvroData.avsc")); GenericRecord datum = new GenericData.Record(schema); datum.put("GUID", new Utf8("1234567")); datum.put("Content", new Utf8("Avro测试")); datum.put("Time", 20130305134700L); // Serialize it. ByteArrayOutputStream out = new ByteArrayOutputStream(); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema); Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); writer.write(datum, encoder); encoder.flush(); out.close(); // Deserialize it. DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema); BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(out.toByteArray(), null); GenericRecord result = reader.read(null, decoder); System.out.printf("GUID: %s, Content: %s, Time: %s\n", result.get("GUID"), result.get("Content"), result.get("Time")); } public void serializeSpecific() throws IOException { AvroData datum = new AvroData(); datum.setGUID(new Utf8("1234567")); datum.setContent(new Utf8("1234567")); datum.setTime(20130305134700L); File tmpFile = new File("E:/avro/myAvroExample.avro"); // Serialize it. DataFileWriter<AvroData> writer = new DataFileWriter<AvroData>(new SpecificDatumWriter<AvroData>(AvroData.class)); writer.create(AvroData.SCHEMA$, tmpFile); writer.append(datum); writer.close(); // Deserialize it. FileReader<AvroData> reader = DataFileReader.openReader(tmpFile, new SpecificDatumReader<AvroData>(AvroData.class)); while (reader.hasNext()) { AvroData result = reader.next(); System.out.printf("GUID: %s, Content: %s, Time: %s\n", result.getGUID(), result.getContent(), result.getTime()); } reader.close(); } public static void main(String[] args) throws IOException { AvroDemo example = new AvroDemo(); System.out.println("Generic"); example.serializeGeneric(); System.out.println("Specific"); example.serializeSpecific(); } }