IP输出 之 ip_output、ip_finish_output、ip_finish_output2

概述

ip_output-设置输出设备和协议,然后经过POST_ROUTING钩子点,最后调用ip_finish_output;

ip_finish_output-对skb进行分片判断,需要分片,则分片后输出,不需要分片则知直接输出;

ip_finish_output2-对skb的头部空间进行检查,看是否能够容纳下二层头部,若空间不足,则需要重新申请skb;然后,获取邻居子系统,并通过邻居子系统输出;

源码分析
 1 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 2 {
 3     struct net_device *dev = skb_dst(skb)->dev;
 4 
 5     IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
 6 
 7     /* 设置输出设备和协议 */
 8     skb->dev = dev;
 9     skb->protocol = htons(ETH_P_IP);
10 
11     /* 经过NF的POST_ROUTING钩子点 */
12     return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
13                 net, sk, skb, NULL, dev,
14                 ip_finish_output,
15                 !(IPCB(skb)->flags & IPSKB_REROUTED));
16 }
 1 static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 2 {
 3     unsigned int mtu;
 4     int ret;
 5 
 6     ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 7     if (ret) {
 8         kfree_skb(skb);
 9         return ret;
10     }
11 
12 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
13     /* Policy lookup after SNAT yielded a new policy */
14     if (skb_dst(skb)->xfrm) {
15         IPCB(skb)->flags |= IPSKB_REROUTED;
16         return dst_output(net, sk, skb);
17     }
18 #endif
19     /* 获取mtu */
20     mtu = ip_skb_dst_mtu(sk, skb);
21 
22     /* 是gso,则调用gso输出 */
23     if (skb_is_gso(skb))
24         return ip_finish_output_gso(net, sk, skb, mtu);
25 
26     /* 长度>mtu或者设置了IPSKB_FRAG_PMTU标记,则分片 */
27     if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
28         return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
29 
30     /* 输出数据包 */
31     return ip_finish_output2(net, sk, skb);
32 }
 1 static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 2 {
 3     struct dst_entry *dst = skb_dst(skb);
 4     struct rtable *rt = (struct rtable *)dst;
 5     struct net_device *dev = dst->dev;
 6     unsigned int hh_len = LL_RESERVED_SPACE(dev);
 7     struct neighbour *neigh;
 8     u32 nexthop;
 9 
10     if (rt->rt_type == RTN_MULTICAST) {
11         IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
12     } else if (rt->rt_type == RTN_BROADCAST)
13         IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
14 
15     /* Be paranoid, rather than too clever. */
16     /* skb头部空间不能存储链路头 */
17     if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
18         struct sk_buff *skb2;
19 
20         /* 重新分配skb */
21         skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
22         if (!skb2) {
23             kfree_skb(skb);
24             return -ENOMEM;
25         }
26         /* 关联控制块 */
27         if (skb->sk)
28             skb_set_owner_w(skb2, skb->sk);
29 
30         /* 释放skb */
31         consume_skb(skb);
32 
33         /* 指向新的skb */
34         skb = skb2;
35     }
36 
37     if (lwtunnel_xmit_redirect(dst->lwtstate)) {
38         int res = lwtunnel_xmit(skb);
39 
40         if (res < 0 || res == LWTUNNEL_XMIT_DONE)
41             return res;
42     }
43 
44     rcu_read_lock_bh();
45     /* 获取下一跳 */
46     nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
47     /* 获取邻居子系统 */
48     neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
49 
50     /* 创建邻居子系统 */
51     if (unlikely(!neigh))
52         neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
53 
54     /* 成功 */
55     if (!IS_ERR(neigh)) {
56         int res;
57 
58         /* 更新路由缓存确认 */
59         sock_confirm_neigh(skb, neigh);
60 
61         /* 通过邻居子系统输出 */
62         res = neigh_output(neigh, skb);
63 
64         rcu_read_unlock_bh();
65         return res;
66     }
67     rcu_read_unlock_bh();
68 
69     net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
70                 __func__);
71     /* 释放skb */
72     kfree_skb(skb);
73     return -EINVAL;
74 }

猜你喜欢

转载自www.cnblogs.com/wanpengcoder/p/11755363.html
IP