TCP->IP输出 之 ip_queue_xmit、ip_build_and_send_pkt、ip_send_unicast_reply

概述

ip_queue_xmit是ip层提供给tcp层发送回调,大多数tcp发送都会使用这个回调,tcp层使用tcp_transmit_skb封装了tcp头之后,调用该函数,该函数提供了路由查找校验、封装ip头和ip选项的功能,封装完成之后调用ip_local_out发送数据包;

ip_build_and_send_pkt函数是服务器端在给客户端回复syn+ack时调用的,该函数在构造ip头之后,调用ip_local_out发送数据包;

ip_send_unicast_reply函数目前只用于发送ACK和RST,该函数根据对端发过来的skb构造ip头,然后调用ip_append_data向发送队列中附加/新增数据,最后调用ip_push_pending_frames发送数据包;

源码分析
  1 /* Note: skb->sk can be different from sk, in case of tunnels */
  2 int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
  3 {
  4     struct inet_sock *inet = inet_sk(sk);
  5     struct net *net = sock_net(sk);
  6     struct ip_options_rcu *inet_opt;
  7     struct flowi4 *fl4;
  8     struct rtable *rt;
  9     struct iphdr *iph;
 10     int res;
 11 
 12     /* Skip all of this if the packet is already routed,
 13      * f.e. by something like SCTP.
 14      */
 15     rcu_read_lock();
 16     inet_opt = rcu_dereference(inet->inet_opt);
 17     fl4 = &fl->u.ip4;
 18 
 19     /* 获取skb中的路由缓存 */
 20     rt = skb_rtable(skb);
 21 
 22     /* skb中有缓存则跳转处理 */
 23     if (rt)
 24         goto packet_routed;
 25 
 26     /* Make sure we can route this packet. */
 27     /* 检查控制块中的路由缓存 */
 28     rt = (struct rtable *)__sk_dst_check(sk, 0);
 29     /* 缓存过期 */
 30     if (!rt) {
 31         __be32 daddr;
 32 
 33         /* Use correct destination address if we have options. */
 34         /* 目的地址 */
 35         daddr = inet->inet_daddr;
 36 
 37         /* 严格路由选项 */
 38         if (inet_opt && inet_opt->opt.srr)
 39             daddr = inet_opt->opt.faddr;
 40 
 41         /* If this fails, retransmit mechanism of transport layer will
 42          * keep trying until route appears or the connection times
 43          * itself out.
 44          */
 45         /* 查找路由缓存 */
 46         rt = ip_route_output_ports(net, fl4, sk,
 47                        daddr, inet->inet_saddr,
 48                        inet->inet_dport,
 49                        inet->inet_sport,
 50                        sk->sk_protocol,
 51                        RT_CONN_FLAGS(sk),
 52                        sk->sk_bound_dev_if);
 53         /* 失败 */
 54         if (IS_ERR(rt))
 55             goto no_route;
 56 
 57         /* 设置控制块的路由缓存 */
 58         sk_setup_caps(sk, &rt->dst);
 59     }
 60 
 61     /* 将路由设置到skb中 */
 62     skb_dst_set_noref(skb, &rt->dst);
 63 
 64 packet_routed:
 65     /* 严格路由选项    &&使用网关,无路由 */
 66     if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
 67         goto no_route;
 68 
 69     /* OK, we know where to send it, allocate and build IP header. */
 70     /* 加入ip头 */
 71     skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
 72     skb_reset_network_header(skb);
 73 
 74     /* 构造ip头 */
 75     iph = ip_hdr(skb);
 76     *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
 77     if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
 78         iph->frag_off = htons(IP_DF);
 79     else
 80         iph->frag_off = 0;
 81     iph->ttl      = ip_select_ttl(inet, &rt->dst);
 82     iph->protocol = sk->sk_protocol;
 83     ip_copy_addrs(iph, fl4);
 84 
 85     /* Transport layer set skb->h.foo itself. */
 86     /* 构造ip选项 */
 87     if (inet_opt && inet_opt->opt.optlen) {
 88         iph->ihl += inet_opt->opt.optlen >> 2;
 89         ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
 90     }
 91 
 92     /* 设置id */
 93     ip_select_ident_segs(net, skb, sk,
 94                  skb_shinfo(skb)->gso_segs ?: 1);
 95 
 96     /* TODO : should we use skb->sk here instead of sk ? */
 97     /* QOS等级 */
 98     skb->priority = sk->sk_priority;
 99     skb->mark = sk->sk_mark;
100 
101     /* 输出 */
102     res = ip_local_out(net, sk, skb);
103     rcu_read_unlock();
104     return res;
105 
106 no_route:
107     /* 无路由处理 */
108     rcu_read_unlock();
109     IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
110     kfree_skb(skb);
111     return -EHOSTUNREACH;
112 }
 1 int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
 2               __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
 3 {
 4     struct inet_sock *inet = inet_sk(sk);
 5     struct rtable *rt = skb_rtable(skb);
 6     struct net *net = sock_net(sk);
 7     struct iphdr *iph;
 8 
 9     /* Build the IP header. */
10     /* 构造ip头 */
11     skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
12     skb_reset_network_header(skb);
13     iph = ip_hdr(skb);
14     iph->version  = 4;
15     iph->ihl      = 5;
16     iph->tos      = inet->tos;
17     iph->ttl      = ip_select_ttl(inet, &rt->dst);
18     iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
19     iph->saddr    = saddr;
20     iph->protocol = sk->sk_protocol;
21 
22     /* 分片与否 */
23     if (ip_dont_fragment(sk, &rt->dst)) {
24         iph->frag_off = htons(IP_DF);
25         iph->id = 0;
26     } else {
27         iph->frag_off = 0;
28         __ip_select_ident(net, iph, 1);
29     }
30 
31     /* 选项 */
32     if (opt && opt->opt.optlen) {
33         iph->ihl += opt->opt.optlen>>2;
34         ip_options_build(skb, &opt->opt, daddr, rt, 0);
35     }
36 
37     /* QOS优先级 */
38     skb->priority = sk->sk_priority;
39     skb->mark = sk->sk_mark;
40 
41     /* Send it out. */
42     /* 输出 */
43     return ip_local_out(net, skb->sk, skb);
44 }
 1 void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 2                const struct ip_options *sopt,
 3                __be32 daddr, __be32 saddr,
 4                const struct ip_reply_arg *arg,
 5                unsigned int len)
 6 {
 7     struct ip_options_data replyopts;
 8     struct ipcm_cookie ipc;
 9     struct flowi4 fl4;
10     struct rtable *rt = skb_rtable(skb);
11     struct net *net = sock_net(sk);
12     struct sk_buff *nskb;
13     int err;
14     int oif;
15 
16     /* 获取ip选项 */
17     if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
18         return;
19 
20     ipc.addr = daddr;
21     ipc.opt = NULL;
22     ipc.tx_flags = 0;
23     ipc.ttl = 0;
24     ipc.tos = -1;
25 
26     /* 选项存在 */
27     if (replyopts.opt.opt.optlen) {
28         ipc.opt = &replyopts.opt;
29 
30         /* 源路由存在,设置下一跳ip地址为目的地址 */
31         if (replyopts.opt.opt.srr)
32             daddr = replyopts.opt.opt.faddr;
33     }
34 
35     /* 输出接口设置 */
36     oif = arg->bound_dev_if;
37     if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
38         oif = skb->skb_iif;
39 
40     /* 查路由 */
41     flowi4_init_output(&fl4, oif,
42                IP4_REPLY_MARK(net, skb->mark),
43                RT_TOS(arg->tos),
44                RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
45                ip_reply_arg_flowi_flags(arg),
46                daddr, saddr,
47                tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
48                arg->uid);
49     security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
50     rt = ip_route_output_key(net, &fl4);
51     if (IS_ERR(rt))
52         return;
53 
54 
55     /* 根据skb更新sk的属性 */
56     inet_sk(sk)->tos = arg->tos;
57 
58     sk->sk_priority = skb->priority;
59     sk->sk_protocol = ip_hdr(skb)->protocol;
60     sk->sk_bound_dev_if = arg->bound_dev_if;
61     sk->sk_sndbuf = sysctl_wmem_default;
62     sk->sk_mark = fl4.flowi4_mark;
63     /* 数据追加到前一个skb或者新建skb后添加到发送队列 */
64     err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
65                  len, 0, &ipc, &rt, MSG_DONTWAIT);
66     if (unlikely(err)) {
67         ip_flush_pending_frames(sk);
68         goto out;
69     }
70 
71     /* 如果发送队列有skb,则计算校验和,发送 */
72     nskb = skb_peek(&sk->sk_write_queue);
73     if (nskb) {
74         if (arg->csumoffset >= 0)
75             *((__sum16 *)skb_transport_header(nskb) +
76               arg->csumoffset) = csum_fold(csum_add(nskb->csum,
77                                 arg->csum));
78         nskb->ip_summed = CHECKSUM_NONE;
79 
80         /* 发送数据包 */
81         ip_push_pending_frames(sk, &fl4);
82     }
83 out:
84     ip_rt_put(rt);
85 }

猜你喜欢

转载自www.cnblogs.com/wanpengcoder/p/11755349.html