DEX文件解析 - header解析
熟悉DEX文件格式有助于我们提高安全和逆向的技术水平,对于DEX文件格式一定要熟悉,因为它是一个基础知识,日后对学习加固、脱壳有很好的帮助。
1. DEX格式整体认知
首先我们要对DEX的大致格式(宏观上)要有认知,用如下图片进行说明:
图片中DEX分成了9大块,其中:
header:DEX文件头
string_ids:字符串索引列表
type_ids:类型索引列表
proto_ids:方法原型/声明索引列表
field_ids:字段索引列表
method_ids:方法索引列表
class_defs:类定义列表
data:数据段
link_data:链接数据段
推荐使用010Editor
来学习DEX的文件结构
2. header格式解析
2.1. header内部结构
接下来深入了解header的格式,以下的代码片段来自Android AOSP源代码。
注:number表示的是数量,就比如java语言中的数组:int[] a = new int[10],int数组中的10就是int数组的大小;size表示的是字节,注意看源代码英文的注释描述。
AOSP源码位置:art/libdexfile/dex/dex_file.h
// Raw header_item.
struct Header {
uint8_t magic_[8] = {
};
uint32_t checksum_ = 0; // See also location_checksum_
uint8_t signature_[kSha1DigestSize] = {
};
uint32_t file_size_ = 0; // size of entire file
uint32_t header_size_ = 0; // offset to start of next section
uint32_t endian_tag_ = 0;
uint32_t link_size_ = 0; // unused
uint32_t link_off_ = 0; // unused
uint32_t map_off_ = 0; // unused
uint32_t string_ids_size_ = 0; // number of StringIds
uint32_t string_ids_off_ = 0; // file offset of StringIds array
uint32_t type_ids_size_ = 0; // number of TypeIds, we don't support more than 65535
uint32_t type_ids_off_ = 0; // file offset of TypeIds array
uint32_t proto_ids_size_ = 0; // number of ProtoIds, we don't support more than 65535
uint32_t proto_ids_off_ = 0; // file offset of ProtoIds array
uint32_t field_ids_size_ = 0; // number of FieldIds
uint32_t field_ids_off_ = 0; // file offset of FieldIds array
uint32_t method_ids_size_ = 0; // number of MethodIds
uint32_t method_ids_off_ = 0; // file offset of MethodIds array
uint32_t class_defs_size_ = 0; // number of ClassDefs
uint32_t class_defs_off_ = 0; // file offset of ClassDef array
uint32_t data_size_ = 0; // size of data section
uint32_t data_off_ = 0; // file offset of data section
// Decode the dex magic version
uint32_t GetVersion() const;
};
magic: 占8个字节
,dex的标识,又叫魔术,其值大致为:dex\n035\0
checksum:占4个字节
,文件的其余部分Alder32校验和(除去magic、checksum)
signature:占20个字节
,文件其余部分的SHA-1签名(除去magic、checksum、signature)
file_size:占4个字节
,DEX文件长度(字节为单位)
header_size:占4个字节
,文件头长度(字节为单位)
endian_tag: 占4个字节
,字节顺序标记,通常值为0x12345678(通常为小端字节序)
link_size:占4个字节
,链接段大小,unused
link_off:占4个字节
,链接段的文件偏移量,unused
map_off:占4个字节
,映射表的文件偏移量,unused
string_ids_size:占4个字节
,字符串索引列表的大小
string_ids_off:占4个字节
,字符串索引列表的文件偏移量
type_ids_size:占4个字节
,类型索引列表大小
type_ids_off:占4个字节
,类型索引列表的文件偏移量
proto_ids_szie:占4个字节
,方法原型列表大小
proto_ids_off:占4个字节
,方法原型索引列表的文件偏移量
field_ids_size:占4个字节
,字段索引列表大小
field_ids_off:占4个字节
,字段索引列表的文件偏移量
method_ids_size:占4个字节
,方法索引列表大小
method_ids_off:占4个字节
,方法索引列表的文件偏移量
class_defs_size:占4个字节
,类定义列表大小
class_defs_off:占4个字节
,类定义列表的文件偏移量
data_size:占4字节
,数据段大小(字节为单位)
data_off:占4字节
,数据段的文件偏移量
可以发现 header
DEX文件头其实可以看做是一个地图
,里面记录
着各个区域
的大小
和偏移量
,这些数据可以帮我们快速的找到这些区域的位置
,从而去解析它们
2.2. 010Editor解析
如图,仔细观察
2.3. header解析
使用java语言来解析DEX文件
主要解析函数如下
/**
* 解析 DexHeader
* @param fis
*/
private static DexHeader toParseDexHeader(FileInputStream fis) {
//新建DexHeader类
DexHeader dexHeader = new DexHeader();
//读取header中的 magic 占8个字节
byte[] magicByte = readData(fis,8);
dexHeader.setMagic(magicByte);
//读取header中的 checksum 占4个字节
byte[] checksumByte = readData(fis,4);
int checksum = NumConversion.byteToInt(checksumByte,false);
dexHeader.setChecksum(checksum);
//读取header中 signature 占20个字节
byte[] signatureByte = readData(fis,20);
dexHeader.setSignature(signatureByte);
//读取header中的 file_size 占4个字节
byte[] fileSizeByte = readData(fis,4);
int file_size = NumConversion.byteToInt(fileSizeByte,false);
dexHeader.setFile_size(file_size);
//读取header中的 header_size 占4个字节
byte[] headerSizeByte = readData(fis,4);
int header_size = NumConversion.byteToInt(headerSizeByte,false);
dexHeader.setHeader_size(header_size);
//读取header中的 endian_tag 占4个字节
byte[] endianTagByte = readData(fis,4);
int endianTag = NumConversion.byteToInt(endianTagByte,false);
dexHeader.setEndian_tag(endianTag);
//读取header中的 link_size 占4个字节
byte[] linkSizeByte = readData(fis,4);
int linkSize = NumConversion.byteToInt(linkSizeByte,false);
dexHeader.setLink_size(linkSize);
//读取header中的 link_off 占4个字节
byte[] linkOffByte = readData(fis,4);
int linkOff = NumConversion.byteToInt(linkOffByte,false);
dexHeader.setLink_off(linkOff);
//读取header中的 map_off 占4个字节
byte[] mapOffByte = readData(fis,4);
int mapOff = NumConversion.byteToInt(mapOffByte,false);
dexHeader.setMap_off(mapOff);
//读取header中的 String_ids_size 占4个字节
byte[] stringIdsSizeByte = readData(fis,4);
int stringIdsSize = NumConversion.byteToInt(stringIdsSizeByte,false);
dexHeader.setString_ids_size(stringIdsSize);
//读取header中的 string_ids_off 占4个字节
byte[] stringIdsOffByte = readData(fis,4);
int stringIdsOff = NumConversion.byteToInt(stringIdsOffByte,false);
dexHeader.setString_ids_off(stringIdsOff);
//读取header中的 type_ids_size 占4个字节
byte[] typeIdsSizeByte = readData(fis,4);
int typeIdsSize = NumConversion.byteToInt(typeIdsSizeByte,false);
dexHeader.setType_ids_size(typeIdsSize);
//读取header中的 type_ids_off 占4个字节
byte[] typeIdsOffByte = readData(fis, 4);
int typeIdsOff = NumConversion.byteToInt(typeIdsOffByte,false);
dexHeader.setType_ids_off(typeIdsOff);
//读取header中的 proto_ids_size 占4个字节
byte[] protoIdsSizeByte = readData(fis, 4);
int protoIdsSize = NumConversion.byteToInt(protoIdsSizeByte, false);
dexHeader.setProto_ids_size(protoIdsSize);
//读取header中的 proto_ids_off 占4个字节
byte[] protoIdsOffByte = readData(fis,4);
int protoIdsOff = NumConversion.byteToInt(protoIdsOffByte,false);
dexHeader.setProto_ids_off(protoIdsOff);
//读取header中的 field_ids_size 占4个字节
byte[] fieldIdsSizeByte = readData(fis,4);
int fieldIdsSize = NumConversion.byteToInt(fieldIdsSizeByte,false);
dexHeader.setField_ids_size(fieldIdsSize);
//读取header中的 field_ids_off 占4个字节
byte[] fieldIdsOffByte = readData(fis,4);
int fieldIdsOff = NumConversion.byteToInt(fieldIdsOffByte,false);
dexHeader.setField_ids_off(fieldIdsOff);
//读取header中的 method_ids_size 占4个字节
byte[] methodIdsSizeByte = readData(fis,4);
int methodIdsSize = NumConversion.byteToInt(methodIdsSizeByte,false);
dexHeader.setMethod_ids_size(methodIdsSize);
//读取header中的 method_ids_off 占4个字节
byte[] methodIdsOffByte = readData(fis, 4);
int methodIdsOff = NumConversion.byteToInt(methodIdsOffByte,false);
dexHeader.setMethod_ids_off(methodIdsOff);
//读取header中的 class_defs_size 占4个字节
byte[] classDefsSizeByte = readData(fis,4);
int classDefsSize = NumConversion.byteToInt(classDefsSizeByte,false);
dexHeader.setClass_defs_size(classDefsSize);
//读取header中的 class_defs_off 占4个字节
byte[] classDefsOffByte = readData(fis,4);
int classDefsOff = NumConversion.byteToInt(classDefsOffByte,false);
dexHeader.setClass_defs_off(classDefsOff);
//读取header中的 data_size 占4个字节
byte[] dataSizeByte = readData(fis,4);
int dataSize = NumConversion.byteToInt(dataSizeByte,false);
dexHeader.setData_size(dataSize);
//读取header中的 data_off 占4个字节
byte[] dataOffByte = readData(fis,4);
int dataOff = NumConversion.byteToInt(dataOffByte,false);
dexHeader.setData_off(dataOff);
return dexHeader;
}
public static byte[] readData(InputStream is,int limit) {
byte[] buff = new byte[limit];
try {
is.read(buff);
} catch (IOException e) {
e.printStackTrace();
}
return buff;
}
工具类:NumConversion
public class NumConversion {
/**
*byte转十六进制(String)
*/
public static String byteToString(byte[] bytes,String split) {
StringBuilder sb = new StringBuilder();
for (int i=0;i<bytes.length;i++) {
byte b = bytes[i];
String hex = Integer.toHexString(b &0xFF);
if(hex.length() <=1) hex = "0"+hex;
if(i!=bytes.length-1) hex = hex+split;
sb.append(hex);
}
return sb.toString();
}
/**
* byte数组转int
* @param bytes byte数组 4字节
* @param isBigEndian 是否大端字节序
* @return
*
* 说明:
* c语言中 uint8_t: 一个字节的无符号整数,其范围为:0 ~ 255
* java语言中 byte:一个字节的有符号整数,其范围为:-128 ~ 127
* 由于DEX文件中大部分都是无符号类型的数,故需要 `&0xFF`
* 若没有 `&OxFF`(与运算 0xFF),那么得到的数有可能是一个溢出的数
*/
public static int byteToInt(byte[] bytes,boolean isBigEndian) {
if (bytes.length != 4) return -1;
int a = (bytes[0] & 0xFF);
int b = (bytes[1] & 0xFF);
int c = (bytes[2] & 0xFF);
int d = (bytes[3] & 0xFF);
if(isBigEndian) {
a = a << 24;
b = b << 16;
c = c << 8;
}else {
d = d << 24;
c = c << 16;
b = b << 8;
}
return a | b | c | d;
}
}
实体类DexHeader
public class DexHeader {
private byte[] magic; //dex魔术
private long checksum; //adler32校验值
private byte[] signature; //signature sha1校验值
private int file_size; //文件大小
private int header_size; //文件头大小
private int endian_tag; //字节顺序标记
private int link_size; //链接段大小
private int link_off; //链接段的文件偏移量
private int map_off; //映射表的文件偏移量
private int string_ids_size; //字符串索引列表大小
private int string_ids_off; //字符串索引列表的文件偏移量
private int type_ids_size; //类型索引列表大小
private int type_ids_off; //类型索引列表的文件偏移量
private int proto_ids_size; //方法原型索引列表大小
private int proto_ids_off; //方法原型索引列表的文件偏移量
private int field_ids_size; //字段索引列表大小
private int field_ids_off; //字段索引列表的文件偏移量
private int method_ids_size; //方法索引列表大小
private int method_ids_off; //方法索引列表的文件偏移量
private int class_defs_size; //类定义列表大小
private int class_defs_off; //类定义列表的文件偏移量
private long data_size; //数据段大小
private int data_off; //数据段的文件偏移量
public byte[] getMagic() {
return magic;
}
public void setMagic(byte[] magic) {
this.magic = magic;
}
public long getChecksum() {
return checksum;
}
public void setChecksum(long checksum) {
this.checksum = checksum;
}
public byte[] getSignature() {
return signature;
}
public void setSignature(byte[] signature) {
this.signature = signature;
}
public int getFile_size() {
return file_size;
}
public void setFile_size(int file_size) {
this.file_size = file_size;
}
public int getHeader_size() {
return header_size;
}
public void setHeader_size(int header_size) {
this.header_size = header_size;
}
public int getEndian_tag() {
return endian_tag;
}
public void setEndian_tag(int endian_tag) {
this.endian_tag = endian_tag;
}
public int getLink_size() {
return link_size;
}
public void setLink_size(int link_size) {
this.link_size = link_size;
}
public int getLink_off() {
return link_off;
}
public void setLink_off(int link_off) {
this.link_off = link_off;
}
public int getMap_off() {
return map_off;
}
public void setMap_off(int map_off) {
this.map_off = map_off;
}
public int getString_ids_size() {
return string_ids_size;
}
public void setString_ids_size(int string_ids_size) {
this.string_ids_size = string_ids_size;
}
public int getString_ids_off() {
return string_ids_off;
}
public void setString_ids_off(int string_ids_off) {
this.string_ids_off = string_ids_off;
}
public int getType_ids_size() {
return type_ids_size;
}
public void setType_ids_size(int type_ids_size) {
this.type_ids_size = type_ids_size;
}
public int getType_ids_off() {
return type_ids_off;
}
public void setType_ids_off(int type_ids_off) {
this.type_ids_off = type_ids_off;
}
public int getProto_ids_size() {
return proto_ids_size;
}
public void setProto_ids_size(int proto_ids_size) {
this.proto_ids_size = proto_ids_size;
}
public int getProto_ids_off() {
return proto_ids_off;
}
public void setProto_ids_off(int proto_ids_off) {
this.proto_ids_off = proto_ids_off;
}
public int getField_ids_size() {
return field_ids_size;
}
public void setField_ids_size(int field_ids_size) {
this.field_ids_size = field_ids_size;
}
public int getField_ids_off() {
return field_ids_off;
}
public void setField_ids_off(int field_ids_off) {
this.field_ids_off = field_ids_off;
}
public int getMethod_ids_size() {
return method_ids_size;
}
public void setMethod_ids_size(int method_ids_size) {
this.method_ids_size = method_ids_size;
}
public int getMethod_ids_off() {
return method_ids_off;
}
public void setMethod_ids_off(int method_ids_off) {
this.method_ids_off = method_ids_off;
}
public int getClass_defs_size() {
return class_defs_size;
}
public void setClass_defs_size(int class_defs_size) {
this.class_defs_size = class_defs_size;
}
public int getClass_defs_off() {
return class_defs_off;
}
public void setClass_defs_off(int class_defs_off) {
this.class_defs_off = class_defs_off;
}
public long getData_size() {
return data_size;
}
public void setData_size(long data_size) {
this.data_size = data_size;
}
public int getData_off() {
return data_off;
}
public void setData_off(int data_off) {
this.data_off = data_off;
}
@Override
public String toString() {
return "DexHeader{" + "\n"+
"magic=" + NumConversion.byteToString(magic," ") + "\n"+
", checksum=" + checksum + "\n"+
", signature=" + NumConversion.byteToString(signature,"") + "\n"+
", file_size=" + file_size + "\n"+
", header_size=" + header_size + "\n"+
", endian_tag=" + Integer.toHexString(endian_tag).toUpperCase()+"h"+ "\n"+
", link_size=" + link_size + "\n"+
", link_off=" + link_off + "\n"+
", map_off=" + Integer.toHexString(map_off).toUpperCase()+"h" + "\n"+
", string_ids_size=" + string_ids_size + "\n"+
", string_ids_off=" + Integer.toHexString(string_ids_off).toUpperCase()+"h" + "\n"+
", type_ids_size=" + type_ids_size + "\n"+
", type_ids_off=" + Integer.toHexString(type_ids_off).toUpperCase()+"h" + "\n"+
", proto_ids_size=" + proto_ids_size + "\n"+
", proto_ids_off=" + Integer.toHexString(proto_ids_off).toUpperCase()+"h" + "\n"+
", field_ids_size=" + field_ids_size + "\n"+
", field_ids_off=" + Integer.toHexString(field_ids_off).toUpperCase()+"h" + "\n"+
", method_ids_size=" + method_ids_size + "\n"+
", method_ids_off=" + Integer.toHexString(method_ids_off).toUpperCase()+"h" + "\n"+
", class_defs_size=" + class_defs_size + "\n"+
", class_defs_off=" + Integer.toHexString(class_defs_off).toUpperCase()+"h" + "\n"+
", data_size=" + data_size + "\n"+
", data_off=" + Integer.toHexString(data_off).toUpperCase()+"h" + "\n"+
'}';
}
}
asjhan for Android reverse