<linux>from lvm2 to device mapper


 了解分布式存储的朋友 一定知道 lvm2 , PV LV VG等 ,简单看看lvm2是如何和内核交互的,为下一步开发自己的lvm 做准备

首先看 lvcreate 的调用走向  希望你自己摸索过lvm 也熟悉vfs ,fs子系统 ,这样可以一看就知道什么意思,然后大家一起交流。

lvm: dev_manager.c

/*
 * Add LV and any known dependencies
 */
static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree, struct logical_volume *lv)
{
//...
	if (!_add_dev_to_dtree(dm, dtree, lv, NULL))
		return_0;

//...
	return 1;
}

 _add_dev_to_dtree ---》int _info() ---》 dm_task_run()

然后就是 dm 库

int dm_task_run(struct dm_task *dmt)
{


repeat_ioctl:/*关键就是这个*/
	if (!(dmi = _do_dm_ioctl(dmt, command, _ioctl_buffer_double_factor)))
		return 0;
	if (dmi->flags & DM_BUFFER_FULL_FLAG) {
		switch (dmt->type) {
		case DM_DEVICE_LIST_VERSIONS:
		case DM_DEVICE_LIST:
		case DM_DEVICE_DEPS:
		case DM_DEVICE_STATUS:
		case DM_DEVICE_TABLE:
		case DM_DEVICE_WAITEVENT:
			_ioctl_buffer_double_factor++;
			dm_free(dmi);
			goto repeat_ioctl;/*这里其实是在循环决定操作*/
		default:
			log_error("WARNING: libdevmapper buffer too small for data");
		}
//...


}
 
static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command,
				     unsigned repeat_count)
{
	struct dm_ioctl *dmi;

	dmi = _flatten(dmt, repeat_count);/*dm_task结构字段合法性检查*/
	if (!dmi) {
		log_error("Couldn't create ioctl argument.");
		return NULL;
	}

	if (dmt->type == DM_DEVICE_TABLE)
		dmi->flags |= DM_STATUS_TABLE_FLAG;

	dmi->flags |= DM_EXISTS_FLAG;	/* FIXME */

	if (dmt->no_open_count)
		dmi->flags |= DM_SKIP_BDGET_FLAG;

//...
#ifdef DM_IOCTLS
	if (ioctl(_control_fd, command, dmi) < 0) {/*注意这里的dmi  */
		if (errno == ENXIO && ((dmt->type == DM_DEVICE_INFO) ||
				       (dmt->type == DM_DEVICE_MKNODES) ||
				       (dmt->type == DM_DEVICE_STATUS)))
			dmi->flags &= ~DM_EXISTS_FLAG;	/* FIXME */
		else {
			if (_log_suppress)
				log_verbose("device-mapper: %s ioctl "
					    "failed: %s",
				    	    _cmd_data_v4[dmt->type].name,
					    strerror(errno));
			else
				log_error("device-mapper: %s ioctl "
					  "failed: %s",
				    	   _cmd_data_v4[dmt->type].name,
					  strerror(errno));
			dm_free(dmi);
			return NULL;
		}
	}
#else /* Userspace alternative for testing */
#endif
	return dmi;
}
 
 我把关键的这行扣出来

ioctl(_control_fd, command, dmi) < 0)
 

看一下 command的取值:

int dm_task_run(struct dm_task *dmt)
{
	struct dm_ioctl *dmi;
	unsigned command;
//...
command = _cmd_data_v4[dmt->type].cmd;
//...
}

对应一个全局数组

static struct cmd_data _cmd_data_v4[] = {
	{"create",	DM_DEV_CREATE,		{4, 0, 0}},
	{"reload",	DM_TABLE_LOAD,		{4, 0, 0}},
	{"remove",	DM_DEV_REMOVE,		{4, 0, 0}},
	{"remove_all",	DM_REMOVE_ALL,		{4, 0, 0}},
	{"suspend",	DM_DEV_SUSPEND,		{4, 0, 0}},
	{"resume",	DM_DEV_SUSPEND,		{4, 0, 0}},
	{"info",	DM_DEV_STATUS,		{4, 0, 0}},
	{"deps",	DM_TABLE_DEPS,		{4, 0, 0}},
	{"rename",	DM_DEV_RENAME,		{4, 0, 0}},
	{"version",	DM_VERSION,		{4, 0, 0}},
	{"status",	DM_TABLE_STATUS,	{4, 0, 0}},
	{"table",	DM_TABLE_STATUS,	{4, 0, 0}},
	{"waitevent",	DM_DEV_WAIT,		{4, 0, 0}},
	{"names",	DM_LIST_DEVICES,	{4, 0, 0}},
	{"clear",	DM_TABLE_CLEAR,		{4, 0, 0}},
	{"mknodes",	DM_DEV_STATUS,		{4, 0, 0}},
#ifdef DM_LIST_VERSIONS
	{"versions",	DM_LIST_VERSIONS,	{4, 1, 0}},
#endif
#ifdef DM_TARGET_MSG
	{"message",	DM_TARGET_MSG,		{4, 2, 0}},
#endif
#ifdef DM_DEV_SET_GEOMETRY
	{"setgeometry",	DM_DEV_SET_GEOMETRY,	{4, 6, 0}},
#endif
};
 

前面就是 shell输入的命令。 关键就是 

struct cmd_data {
	const char *name;
	const int cmd;
	const int version[3];
};
具体 struct  cmd_data ->cmd 取值 就是下面的宏
*
* DM_VERSION:
* Just get the version information for the ioctl interface.
*
* DM_REMOVE_ALL:
* Remove all dm devices, destroy all tables. Only really used
* for debug.
*
* DM_LIST_DEVICES:
* Get a list of all the dm device names.
*
* DM_DEV_CREATE:
* Create a new device, neither the 'active' or 'inactive' table
* slots will be filled. The device will be in suspended state
* after creation, however any io to the device will get errored
* since it will be out-of-bounds.
*
* DM_DEV_REMOVE:
* Remove a device, destroy any tables.
*
* DM_DEV_RENAME:
* Rename a device.
*
* DM_SUSPEND:
* This performs both suspend and resume, depending which flag is
* passed in.
* Suspend: This command will not return until all pending io to
* the device has completed. Further io will be deferred until
* the device is resumed.
* Resume: It is no longer an error to issue this command on an
* unsuspended device. If a table is present in the 'inactive'
* slot, it will be moved to the active slot, then the old table
* from the active slot will be _destroyed_. Finally the device
* is resumed.
*
* DM_DEV_STATUS:
* Retrieves the status for the table in the 'active' slot.
*
* DM_DEV_WAIT:
* Wait for a significant event to occur to the device. This
* could either be caused by an event triggered by one of the
* targets of the table in the 'active' slot, or a table change.
*
* DM_TABLE_LOAD:
* Load a table into the 'inactive' slot for the device. The
* device does _not_ need to be suspended prior to this command.
*
* DM_TABLE_CLEAR:
* Destroy any table in the 'inactive' slot (ie. abort).
*
* DM_TABLE_DEPS:
* Return a set of device dependencies for the 'active' table.
*
* DM_TABLE_STATUS:
* Return the targets status for the 'active' table.
*
* DM_TARGET_MSG:
* Pass a message string to the target at a specific offset of a device.
*
* DM_DEV_SET_GEOMETRY:
* Set the geometry of a device by passing in a string in this format:
*
* "cylinders heads sectors_per_track start_sector"
*
* Beware that CHS geometry is nearly obsolete and only provided
* for compatibility with dm devices that can be booted by a PC
* BIOS. See struct hd_geometry for range limits. Also note that
* the geometry is erased if the device size changes.
*/

接着往下看

#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
/*.....................*/

#define _IOWR(type,nr,size)	_IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
 

比如我们通过shell 调用[root]\> dmsetup create kko txt

 写道
(gdb) p *dmi
$5 = {version = {4, 0, 0}, data_size = 16384, data_start = 312, target_count = 0, open_count = 0,
flags = 4, event_nr = 0, padding = 0, dev = 0, name = "kko", '\000' <repeats 124 times>,
uuid = '\000' <repeats 128 times>, data = "\000\000\000\000\000\000"}
(gdb) p command
$6 = 3241737475
 

 3241737475 就是 C138FD03

在 dm-ioctl.h 里面可以看到

#define DM_IOCTL 0xfd

关键的 _IOWR_(nr) 的序号就是

enum {
	/* Top level cmds */
	DM_VERSION_CMD = 0,
	DM_REMOVE_ALL_CMD,
	DM_LIST_DEVICES_CMD,

	/* device level cmds */
	DM_DEV_CREATE_CMD,
	DM_DEV_REMOVE_CMD,
	DM_DEV_RENAME_CMD,
	DM_DEV_SUSPEND_CMD,
	DM_DEV_STATUS_CMD,
	DM_DEV_WAIT_CMD,

	/* Table level cmds */
	DM_TABLE_LOAD_CMD,
	DM_TABLE_CLEAR_CMD,
	DM_TABLE_DEPS_CMD,
	DM_TABLE_STATUS_CMD,

	/* Added later */
	DM_LIST_VERSIONS_CMD,
	DM_TARGET_MSG_CMD,
	DM_DEV_SET_GEOMETRY_CMD
};

所以DM_DEV_CREATE 就是 03

明白这个几个宏之后 , 就去内核看看处理

device_mapper 主要的代码都在  /drivers/md 里面

首先看一下和上面 ioctl 一一对应的一个结构体 

1 先注册fs 的操作类

static const struct file_operations _ctl_fops = {
	.open = nonseekable_open,
	.unlocked_ioctl	 = dm_ctl_ioctl,
	.compat_ioctl = dm_compat_ctl_ioctl,
	.owner	 = THIS_MODULE,
	.llseek  = noop_llseek,
};

当用户调用 ioctl 的时候(上面分析过) 通过字符设备  就会调用内核的.unlocked_ioctl

2 接下来就是dm_ctl_ioctl()->ctl_ioctl() -> lookup_ioctl()

static ioctl_fn lookup_ioctl(unsigned int cmd)
{
	static struct {
		int cmd;
		ioctl_fn fn;
	} _ioctls[] = {
		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
		{DM_REMOVE_ALL_CMD, remove_all},
		{DM_LIST_DEVICES_CMD, list_devices},

		{DM_DEV_CREATE_CMD, dev_create},
		{DM_DEV_REMOVE_CMD, dev_remove},
		{DM_DEV_RENAME_CMD, dev_rename},
		{DM_DEV_SUSPEND_CMD, dev_suspend},
		{DM_DEV_STATUS_CMD, dev_status},
		{DM_DEV_WAIT_CMD, dev_wait},

		{DM_TABLE_LOAD_CMD, table_load},
		{DM_TABLE_CLEAR_CMD, table_clear},
		{DM_TABLE_DEPS_CMD, table_deps},
		{DM_TABLE_STATUS_CMD, table_status},

		{DM_LIST_VERSIONS_CMD, list_versions},

		{DM_TARGET_MSG_CMD, target_message},
		{DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry}
	};

	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
}
 

按上面的 shell [root]\>dmsetup create xxx

   gdb  

p command /*第一次调用ioctl(,command,)*/
$1 = 3241737475 /*枚举 03: DM_DEV_CREATE_CMD,*/

   就会去调用 dev_create 

static int dev_create(struct dm_ioctl *param, size_t param_size)
{
	int r, m = DM_ANY_MINOR;
	struct mapped_device *md;

	r = check_name(param->name);/*这个就是dmsetup create 后面跟的名字*/
	if (r)
		return r;

	if (param->flags & DM_PERSISTENT_DEV_FLAG)/*这里一般都是由内核自己制定一个未用的主备设备号*/
		m = MINOR(huge_decode_dev(param->dev));

	r = dm_create(m, &md);/*初始化mapped_device */
	if (r)
		return r;
	/*hash插入mapped_device到全局 static struct list_head _name_buckets[NUM_BUCKETS];*/
	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
	if (r) {
		dm_put(md);
		dm_destroy(md);
		return r;
	}

	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;

	__dev_status(md, param);

	dm_put(md);

	return 0;
}

接着gdb  就会发现第二次调用 ioctl 的地方

gdb 
p command /*第一次调用ioctl(,command,)*/
$2 = C138FD09 /*枚举 09: DM_TABLE_LOAD_CMD*/
static int table_load(struct dm_ioctl *param, size_t param_size)
{
	int r;
	struct hash_cell *hc;
	struct dm_table *t;
	struct mapped_device *md;
	/*在hash表中找到操作对应的mapped_device */
	md = find_device(param);
	if (!md)
		return -ENXIO;
	/*产生一个dm_table 结构*/
	r = dm_table_create(&t, get_mode(param), param->target_count, md);
	if (r)
		goto out;
	/*通过dm_table_add_target 初始化需要的dm_taget结构 并且保持在一颗对应md 的btree上*/
	r = populate_table(t, param, param_size);
	if (r) {
		dm_table_destroy(t);
		goto out;
	}

	/* Protect md->type and md->queue against concurrent table loads. */
	dm_lock_md_type(md);
	if (dm_get_md_type(md) == DM_TYPE_NONE)
		/* Initial table load: acquire type of table. */
		dm_set_md_type(md, dm_table_get_type(t));
	else if (dm_get_md_type(md) != dm_table_get_type(t)) {
		DMWARN("can't change device type after initial table load.");
		dm_table_destroy(t);
		dm_unlock_md_type(md);
		r = -EINVAL;
		goto out;
	}

	/* setup md->queue to reflect md's type (may block) */
	r = dm_setup_md_queue(md);/*初始化DM vfs 请求队列*/
	if (r) {
		DMWARN("unable to set up device queue for new table.");
		dm_table_destroy(t);
		dm_unlock_md_type(md);
		goto out;
	}
	dm_unlock_md_type(md);

	/* stage inactive table */
	down_write(&_hash_lock);
	hc = dm_get_mdptr(md);
	if (!hc || hc->md != md) {
		DMWARN("device has been removed from the dev hash table.");
		dm_table_destroy(t);
		up_write(&_hash_lock);
		r = -ENXIO;
		goto out;
	}

	if (hc->new_map)
		dm_table_destroy(hc->new_map);
	hc->new_map = t;/*放入hash 结构中*/
	up_write(&_hash_lock);

	param->flags |= DM_INACTIVE_PRESENT_FLAG;
	__dev_status(md, param);

out:
	dm_put(md);

	return r;
}
 

接着gdb  就会发现第三次调用 ioctl 的地方

 

 

gdb 
p command /*第一次调用ioctl(,command,)*/
$2 = 3241737478 /*C138FD06 枚举 06: DM_DEV_SUSPEND_CMD*/
 

 static int dev_suspend(struct dm_ioctl *param, size_t param_size)

{
	if (param->flags & DM_SUSPEND_FLAG)
		return do_suspend(param);
	/*用新的 dm_table 激活mapped device*/
	return do_resume(param);
}
 


=======================================================

这里补充一下关于 lvm2 伟大的逻辑卷管理:

大家都知道 lvm2 会把自己的卷组信息写在分区的前512个字节上。 主要通过  dev-io.c

的 _io() 方法,操作原始磁盘 这块尤其重要 这个后面再看。

static int _io(struct device_area *where, void *buffer, int should_write)
 

而所有的备份 灾难恢复秘密都在 Archive.c 文件中

外部的接口主要有: 

int vgcfgrestore(struct cmd_context *cmd, int argc, char **argv)

—>

int archive_display(struct cmd_context *cmd, const char *vg_name)

--->

int archive_list(struct cmd_context *cmd, const char *dir, const char *vgname)

--->

/*
 * Returns a list of archive_files.
 */
static struct list *_scan_archive(struct dm_pool *mem,
				  const char *vgname, const char *dir)

  明白了吧 。所以你可以在 [root]\> 输入 

 
[root@localhost home]# vgcfgrestore
Please specify a *single* volume group to restore.
 

别的关于 vgcfg 大家可以自己看

对于备份  vgcfgbackup

主要的流程就是 

--->int process_each_vg()

然后会去找当前有效的VG 然后通过_process_one_vg  接下里就是通过

->>> 

vg = vg_read(cmd, vg_name, vgid, &consistent)

(这里的struct cmd_context *cmd 里面记录了)

struct cmd_context {
	//....
	struct list config_files;
	int config_valid;
	struct config_tree *cft;
	struct config_tree *cft_override;
	struct config_info default_settings;
	struct config_info current_settings;

	struct archive_params *archive_params;
	struct backup_params *backup_params;

	/* List of defined tags */
	struct list tags;
	int hosttags;
/*目录的的路径 */
	char sys_dir[PATH_MAX];/*一般是: /etc/lvm*/
	char dev_dir[PATH_MAX];/*一般是: /dev/*/
	char proc_dir[PATH_MAX];/*一般是: /proc*/
};
 

关键的部分到了

->>> 

archiver.c:

 int backup(struct volume_group *vg)

            int __backup(struct volume_group *vg)

                backup_to_file(name, desc, vg);

好的 lvm 的基本 skeleton 就这样了。。  别的都能看懂, 我不分析了 

猜你喜欢

转载自sunzixun.iteye.com/blog/993182