IO复用——poll机制内核源代码剖析

*select内核源代码已经剖析了,但是有个问题还没有解决。。。面对每一种文件描述符如何进行查询?这就和poll机制有关了。。。这篇就来看看poll机制内核源代码。。。→_→*

了解select底层实现请戳传送门——IO复用——select内核源代码剖析

每一个进程都会有一个与之对应的files_struct结构,files_struct结构中存储着该进程打开的文件的集合

struct files_struct {
    atomic_t count;
    rwlock_t file_lock;
    int max_fds;
    int max_fdset;
    int next_fd;
    struct file ** fd;  /* current fd array */ //fd指向fd_array
    fd_set *close_on_exec;
    fd_set *open_fds;
    fd_set close_on_exec_init;
    fd_set open_fds_init;
    struct file * fd_array[NR_OPEN_DEFAULT]; //记录该进程打开文件的数组集合
};

每一个文件都有与之对应的file_operations类型的文件操作方法

//linux-2.4.0\include\linux\Fs.h
struct file {
    struct list_head    f_list;
    struct dentry       *f_dentry; //dentry结构中的d_inode结构中记录着本文件的等待队列,即监听本文件的进程对应的wait_queue_t结构
    struct vfsmount         *f_vfsmnt;
    struct file_operations  *f_op; //这是很关键的部分,它决定了poll机制是否可用
    atomic_t        f_count;
    unsigned int        f_flags;
    mode_t          f_mode;
    loff_t          f_pos;
    unsigned long       f_reada, f_ramax, f_raend, f_ralen, f_rawin;
    struct fown_struct  f_owner;
    unsigned int        f_uid, f_gid;
    int         f_error;

    unsigned long       f_version;

    /* needed for tty driver, and maybe others */
    void            *private_data;
};

对文件的操作方法有很多种,而每一种实现在机制上都使用回调函数的方法,其中就包括一种poll操作的回调函数

//linux-2.4.0\include\linux\Fs.h
struct file_operations {
    struct module *owner;
    loff_t (*llseek) (struct file *, loff_t, int);
    ssize_t (*read) (struct file *, char *, size_t, loff_t *);
    ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
    int (*readdir) (struct file *, void *, filldir_t);
    unsigned int (*poll) (struct file *, struct poll_table_struct *); //poll操作所对应的回调函数,函数的具体实现和文件的类型有关,如果文件不支持poll操作也就无法在select中使用
    int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
    int (*mmap) (struct file *, struct vm_area_struct *);
    int (*open) (struct inode *, struct file *);
    int (*flush) (struct file *);
    int (*release) (struct inode *, struct file *);
    int (*fsync) (struct file *, struct dentry *, int datasync);
    int (*fasync) (int, struct file *, int);
    int (*lock) (struct file *, int, struct file_lock *);
    ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
    ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
};

在每一种文件的poll操作回调函数中都会调用有poll_wait函数,目的是将监听本文件的进程的对应wait_queue_t结构,添加进本文件的等待队列中

//linux-2.4.0\arch\i386\math-emu\Poly.h
//wait_address记录着本文件的等待队列队头的地址
//p指向该进程的poll_table结构
extern inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
    if (p && wait_address) 
        __pollwait(filp, wait_address, p);
}
//linux-2.4.0\arch\i386\math-emu\Poly.h
void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
    //table记录当前进程由poll_table_page结构组成的单链的第一个可用结点
    struct poll_table_page *table = p->table;

    //如果单链不存在或所有poll_table_page结构的页面都被poll_table_entry结构使用,即没有空闲空间时
    if (!table || POLL_TABLE_FULL(table)) {
        struct poll_table_page *new_table;
        //为其分配一个新的页面,扩充其容量
        new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
        if (!new_table) {
            p->error = -ENOMEM;
            __set_current_state(TASK_RUNNING);
            return;
        }
        //设置新poll_table_page结构页面的第一个可用poll_table_entry结构为poll_table_entry结构数组的第一个元素
        new_table->entry = new_table->entries;
        //poll_table_page结构页面的单链表进行更新
        new_table->next = table;
        //当前进程的poll_table结构成员进行更新
        p->table = new_table;
        table = new_table;
    }

    /* Add a new entry */
    {
        //获取当前进程的第一个空闲poll_table_entry结构
        struct poll_table_entry * entry = table->entry;
        //更新第一个空闲poll_table_entry结构
        table->entry = entry+1;
        //对该文件的引用计数加1
        get_file(filp);
        //将此poll_table_entry结构的filp成员设置为该文件
        entry->filp = filp;
        //将此poll_table_entry结构的wait_address成员,即等待队列的队头设置为该文件的等待队列的队头
        entry->wait_address = wait_address;
        //将此poll_table_entry结构的wait成员,即每个进程对应的wait_queue_t结构,将其中的task_struck结构设置为当前进程的task_struck
        //init_waitqueue_entry定义在下面
        init_waitqueue_entry(&entry->wait, current);
        //将该进程对应的wait_queue_t结构链入该文件的等待队列中
        //add_wait_queue定义在下面
        add_wait_queue(wait_address,&entry->wait);
    }
}
//linux-2.4.0\include\linux\Wait.h
//在init_waitqueue_entry中,将wait_queue_t结构中p设置为指向当前进程task_struck结构,所以当驱动设备唤醒该文件的等待队列中每一个wait_queue_t结构对应的进程时,就可以从wait_queue_t结构中的p成员找到进程的task_struck结构
static inline void init_waitqueue_entry(wait_queue_t *q,
                 struct task_struct *p)
{
#if WAITQUEUE_DEBUG
    if (!q || !p)
        WQ_BUG();
#endif
    q->flags = 0; //wait_queue_t结构的flags置为0
    q->task = p;//wait_queue_t结构的p设置为指向当前进程的task_struck结构
#if WAITQUEUE_DEBUG
    q->__magic = (long)&q->__magic;
#endif
}
//linux-2.4.0\kernel\Fork.c
void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
{
    unsigned long flags;

    wq_write_lock_irqsave(&q->lock, flags);
    wait->flags = 0; //wait_queue_t结构的flags置为0
    __add_wait_queue(q, wait);//将该进程对应的wait_queue_t结构链入该文件的等待队列中
    wq_write_unlock_irqrestore(&q->lock, flags);
}

通过对Linux内核源代码的剖析,我们对poll机制已经有了深入地了解,现在我们再回到select中,解决最后的问题

//linux-2.4.0\fs\Select.c(部分截取)
    ......
    for (;;) {
        set_current_state(TASK_INTERRUPTIBLE);
        for (i = 0 ; i < n; i++) {
            unsigned long bit = BIT(i);
            unsigned long mask;
            struct file *file; //记录文件结构体

            off = i / __NFDBITS;
            if (!(bit & BITS(fds, off)))
                continue;
            file = fget(i); //利用文件描述符从该进程打开的文件描述符数组中,获取对应的文件结构体
            mask = POLLNVAL;
            if (file) {
                mask = DEFAULT_POLLMASK;
                //此时需要判断该文件是否支持操作和poll操作
                if (file->f_op && file->f_op->poll)
                    //如果支持,就调用该类型文件的poll操作所对应的回调函数,并传入该文件结构体和该进程所对应的wait_queue_t结构,mask记录返回值
                    //这一操作就是在剖析select时提到的查询,所以真正的查询由poll回调函数完成
                    mask = file->f_op->poll(file, wait);
                fput(file);
            }
            if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
                SET(bit, __RES_IN(fds,off));
                retval++;
                wait = NULL;
            }
            if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
                SET(bit, __RES_OUT(fds,off));
                retval++;
                wait = NULL;
            }
            if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
                SET(bit, __RES_EX(fds,off));
                retval++;
                wait = NULL;
            }
        }
        wait = NULL;
        if (retval || !__timeout || signal_pending(current))
            break;
        if(table.error) {
            retval = table.error;
            break;
        }
        __timeout = schedule_timeout(__timeout);
    }
    ......

*下一篇就来总结epoll喽。。。睡觉。。。→_→*

猜你喜欢

转载自blog.csdn.net/kongkongkkk/article/details/77273199