禁用于任何商业用途。
msn: [email protected]
来源:http://yfydz.cublog.cn
5.3 连接 连接通常是针对客户端连接服务器 static int netlink_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { int err = 0; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr; if (addr->sa_family == AF_UNSPEC) { // 目的地址协议族为AF_UNSPEC(未指定), 简单返回成功 sk->sk_state = NETLINK_UNCONNECTED; nlk->dst_pid = 0; nlk->dst_group = 0; return 0; } // 限制目的地址协议族类型为AF_NETLINK if (addr->sa_family != AF_NETLINK) return -EINVAL; /* Only superuser is allowed to send multicasts */ // 只有ROOT权限才能多播 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) return -EPERM; // 没指定pid的话自动绑定一个pid if (!nlk->pid) err = netlink_autobind(sock); if (err == 0) { // 已经指定了pid或者自动绑定成功时设置sock的对方参数, 状态为连接成功 sk->sk_state = NETLINK_CONNECTED; nlk->dst_pid = nladdr->nl_pid; nlk->dst_group = ffs(nladdr->nl_groups); } return err; } 5.4 获取sock名称 // 填充sockaddr_nl结构中的数据 static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; // 协议族 nladdr->nl_family = AF_NETLINK; nladdr->nl_pad = 0; *addr_len = sizeof(*nladdr); if (peer) { // 对方sock的pid和groups nladdr->nl_pid = nlk->dst_pid; nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { // 自己sock的pid和groups nladdr->nl_pid = nlk->pid; nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; } return 0; } 5.5 poll poll是用poll(2)或select(2)系统调用选择套接口数据是否准备好时的处理函数,netlink用的是通用 的数据报的poll处理函数dategram_poll(), 说明略。 5.6 setsockopt 设置netlink sock的各种控制参数: static int netlink_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); int val = 0, err; // sock层次要为SOL_NETLINK if (level != SOL_NETLINK) return -ENOPROTOOPT; // 读取用户空间的设置信息 if (optlen >= sizeof(int) && get_user(val, (int __user *)optval)) return -EFAULT; switch (optname) { case NETLINK_PKTINFO: // 处理NETLINK_RECV_PKTINFO标志, 非0设置, 0为清除 if (val) nlk->flags |= NETLINK_RECV_PKTINFO; else nlk->flags &= ~NETLINK_RECV_PKTINFO; err = 0; break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { // 加入或退出多播组 unsigned int subscriptions; int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0; // 检查权限 if (!netlink_capable(sock, NL_NONROOT_RECV)) return -EPERM; // 如果当前sock的多播组为空是分配空间 if (nlk->groups == NULL) { err = netlink_alloc_groups(sk); if (err) return err; } // 检查数据范围 if (!val || val - 1 >= nlk->ngroups) return -EINVAL; netlink_table_grab(); // 原来的状态标志 old = test_bit(val - 1, nlk->groups); // 如果old=1, new=0, subscriptions-1 // 如果old=0, new=1, subscriptions+1 subscriptions = nlk->subscriptions - old + new; // 设置或清除相应状态标志 if (new) __set_bit(val - 1, nlk->groups); else __clear_bit(val - 1, nlk->groups); // 更新sock参数 netlink_update_subscriptions(sk, subscriptions); netlink_update_listeners(sk); netlink_table_ungrab(); err = 0; break; } default: err = -ENOPROTOOPT; } return err; } // 分配netlink sock的多播组空间 static int netlink_alloc_groups(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); unsigned int groups; int err = 0; netlink_lock_table(); // 组的数量是内核初始化时固定的, 最小值32, 尽量是8的倍数 groups = nl_table[sk->sk_protocol].groups; if (!nl_table[sk->sk_protocol].registered) err = -ENOENT; netlink_unlock_table(); if (err) return err; // NLGRPSZ(groups)进行8字节对齐 nlk->groups = kzalloc(NLGRPSZ(groups), GFP_KERNEL); if (nlk->groups == NULL) return -ENOMEM; nlk->ngroups = groups; return 0; } 5.7 getsockopt 获取netlink sock的各种控制参数: static int netlink_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); int len, val, err; // sock层次要为SOL_NETLINK if (level != SOL_NETLINK) return -ENOPROTOOPT; // 读取用户空间的查询信息 if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; switch (optname) { case NETLINK_PKTINFO: // 只提供一种选项信息PKTINFO if (len < sizeof(int)) return -EINVAL; len = sizeof(int); // 看sock标志是否有NETLINK_RECV_PKTINFO返回1或0 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; if (put_user(len, optlen) || put_user(val, optval)) return -EFAULT; err = 0; break; default: err = -ENOPROTOOPT; } return err; } 5.8 发送消息 从用户层发送数据到内核, 内核的sock是接收方 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { // sock的IO控制块 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); // socket -> sock struct sock *sk = sock->sk; // sock -> netlink sock struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr=msg->msg_name; u32 dst_pid; u32 dst_group; struct sk_buff *skb; int err; // scm: Socket level control messages processing struct scm_cookie scm; // 设置了OOB(out of band)标志, 在TCP中支持,netlink不支持 if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; if (NULL == siocb->scm) siocb->scm = &scm; // scm这些处理是干什么的以后再看 err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; // 确定目的pid和组 if (msg->msg_namelen) { if (addr->nl_family != AF_NETLINK) return -EINVAL; dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) return -EPERM; } else { dst_pid = nlk->dst_pid; dst_group = nlk->dst_group; } // 如果sock的pid为0, 自动绑定一个pid if (!nlk->pid) { err = netlink_autobind(sock); if (err) goto out; } err = -EMSGSIZE; // 消息长度太大 if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; // 新生成一个skb数据包 skb = nlmsg_new(len, GFP_KERNEL); if (skb==NULL) goto out; // 设置该skb的netlink控制块参数 NETLINK_CB(skb).pid = nlk->pid; NETLINK_CB(skb).dst_pid = dst_pid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); selinux_get_task_sid(current, &(NETLINK_CB(skb).sid)); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); /* What can I do? Netlink is asynchronous, so that we will have to save current capabilities to check them, when this message will be delivered to corresponding kernel module. --ANK (980802) */ err = -EFAULT; // 将发送的信息拷贝到skb的存储区 if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) { kfree_skb(skb); goto out; } /* @netlink_send: * Save security information for a netlink message so that permission * checking can be performed when the message is processed. The security * information can be saved using the eff_cap field of the * netlink_skb_parms structure. Also may be used to provide fine * grained control over message transmission. * @sk associated sock of task sending the message., * @skb contains the sk_buff structure for the netlink message. * Return 0 if the information was successfully saved and message * is allowed to be transmitted. */ err = security_netlink_send(sk, skb); if (err) { kfree_skb(skb); goto out; } // 如果是多播的,先进行广播发送 if (dst_group) { // 增加使用者计数, 使skb不会真正释放 atomic_inc(&skb->users); netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); } // 单播发送 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); out: return err; } // netlink广播, 发送到组内的全部sock int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, gfp_t allocation) { // netlink广播数据结构信息 struct netlink_broadcast_data info; struct hlist_node *node; struct sock *sk; // 调整skb空间 skb = netlink_trim(skb, allocation); // 填充info结构基本参数 info.exclude_sk = ssk; info.pid = pid; info.group = group; info.failure = 0; info.congested = 0; info.delivered = 0; info.allocation = allocation; info.skb = skb; info.skb2 = NULL; /* While we sleep in clone, do not allow to change socket list */ netlink_lock_table(); // 遍历多播链表, 分别对每个sock进行单播 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); // 释放skb, 其实没有立即释放, 要先减少使用者数 kfree_skb(skb); netlink_unlock_table(); // 如果分配了skb2,释放之 if (info.skb2) kfree_skb(info.skb2); if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) yield(); return 0; } if (info.failure) return -ENOBUFS; return -ESRCH; } // 单一广播 static inline int do_one_broadcast(struct sock *sk, struct netlink_broadcast_data *p) { struct netlink_sock *nlk = nlk_sk(sk); int val; if (p->exclude_sk == sk) goto out; // 检查pid和组是否合法 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; if (p->failure) { netlink_overrun(sk); goto out; } sock_hold(sk); if (p->skb2 == NULL) { if (skb_shared(p->skb)) { // 克隆skb p->skb2 = skb_clone(p->skb, p->allocation); } else { // 此时skb2不会为NULL的 p->skb2 = skb_get(p->skb); /* * skb ownership may have been set when * delivered to a previous socket. */ skb_orphan(p->skb2); } } if (p->skb2 == NULL) { // 如果还是为NULL必然是克隆失败 netlink_overrun(sk); /* Clone failed. Notify ALL listeners. */ p->failure = 1; // 否则发送skb2 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { netlink_overrun(sk); } else { // 数据正常发送 p->congested |= val; p->delivered = 1; p->skb2 = NULL; } sock_put(sk); out: return 0; } static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); // 发送缓冲中要有足够空间 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !test_bit(0, &nlk->state)) { skb_set_owner_r(skb, sk); // 添加到接收队列尾, 由于是本机内部通信, 可以自己找到要发送的目的方, // 所以直接将数据扔给目的方, 所以是接收队列 skb_queue_tail(&sk->sk_receive_queue, skb); // 调用netlink sock的sk_data_ready函数处理, 由此进入内核中netlink各协议 // 的回调处理 sk->sk_data_ready(sk, skb->len); return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; } return -1; } // netlink单播 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) { struct sock *sk; int err; long timeo; // 调整skb大小 skb = netlink_trim(skb, gfp_any()); // 获取超时时间 timeo = sock_sndtimeo(ssk, nonblock); retry: // ssk是服务器端的sock, 然后根据pid找到客户端的sock sk = netlink_getsockbypid(ssk, pid); if (IS_ERR(sk)) { kfree_skb(skb); return PTR_ERR(sk); } // 将数据包附着在客户端sock上 err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); if (err == 1) goto retry; if (err) return err; // 发送netlink数据包 return netlink_sendskb(sk, skb, ssk->sk_protocol); } /* * Attach a skb to a netlink socket. * The caller must hold a reference to the destination socket. On error, the * reference is dropped. The skb is not send to the destination, just all * all error checks are performed and memory in the queue is reserved. * Return values: * < 0: error. skb freed, reference to sock dropped. * 0: continue * 1: repeat lookup - reference dropped while waiting for socket memory. */ // 注意这个是内核全局函数, 非static int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo, struct sock *ssk) { struct netlink_sock *nlk; nlk = nlk_sk(sk); // 检查接收缓存大小是否足够, 不够的话阻塞等待直到出错或条件满足 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || test_bit(0, &nlk->state)) { // 声明当前进程的等待队列 DECLARE_WAITQUEUE(wait, current); if (!timeo) { if (!ssk || nlk_sk(ssk)->pid == 0) netlink_overrun(sk); sock_put(sk); kfree_skb(skb); return -EAGAIN; } // 设置当前进程状态为可中断的 __set_current_state(TASK_INTERRUPTIBLE); // 将sock挂接到等待队列 add_wait_queue(&nlk->wait, &wait); // 空间不够的话阻塞, timeo为阻塞超时 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || test_bit(0, &nlk->state)) && !sock_flag(sk, SOCK_DEAD)) timeo = schedule_timeout(timeo); // 进程状态运行 __set_current_state(TASK_RUNNING); // 删除等待队列 remove_wait_queue(&nlk->wait, &wait); sock_put(sk); if (signal_pending(current)) { // 阻塞是通过超时解开的,而不是空间条件符合解开, 属于错误状态 kfree_skb(skb); return sock_intr_errno(timeo); } // 返回1, 重新选sock return 1; } // 条件满足, 直接将skb的所有者设为该netlink sock skb_set_owner_r(skb, sk); return 0; } // 注意这个是内核全局函数, 非static int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol) { int len = skb->len; // 将skb添加到接收队列末尾 skb_queue_tail(&sk->sk_receive_queue, skb); // 调用netlink sock的sk_data_ready函数处理 sk->sk_data_ready(sk, len); sock_put(sk); return len; } 5.9 接收消息 数据是内核传向用户空间的 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { // sock的IO控制块 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); // scm struct scm_cookie scm; // socket -> sock struct sock *sk = sock->sk; // sock -> netlink sock struct netlink_sock *nlk = nlk_sk(sk); // 是否是非阻塞的 int noblock = flags&MSG_DONTWAIT; size_t copied; struct sk_buff *skb; int err; // 不能带OOB标志 if (flags&MSG_OOB) return -EOPNOTSUPP; copied = 0; // 接收一个数据包 skb = skb_recv_datagram(sk,flags,noblock,&err); if (skb==NULL) goto out; msg->msg_namelen = 0; // 收到的实际数据长度 copied = skb->len; // 接收缓冲小于数据长度, 设置数据裁剪标志 if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb->h.raw = skb->data; // 将skb的数据拷贝到接收缓冲区 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (msg->msg_name) { // sock有效, 填写nl sock的数据 struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name; addr->nl_family = AF_NETLINK; addr->nl_pad = 0; addr->nl_pid = NETLINK_CB(skb).pid; addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } // 接收数据包信息标志, 将消息头拷贝到用户空间 if (nlk->flags & NETLINK_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); if (NULL == siocb->scm) { memset(&scm, 0, sizeof(scm)); siocb->scm = &scm; } siocb->scm->creds = *NETLINK_CREDS(skb); skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) netlink_dump(sk); scm_recv(sock, msg, siocb->scm, flags); out: // 接收唤醒 netlink_rcv_wake(sk); return err ? : copied; } 6. 结论 netlink处理代码不是很好懂, 毕竟和其他协议不同之处是内核中同时存在服务器和客户端的sock, 因 此接收发送数据要注意数据的流向。不过在实际使用中感觉不是很稳定, 流量大时会发生各种奇异的死机现象。