whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

ip6_output.c (47211B)


      1 /*
      2  *	IPv6 output functions
      3  *	Linux INET6 implementation
      4  *
      5  *	Authors:
      6  *	Pedro Roque		<roque@di.fc.ul.pt>
      7  *
      8  *	Based on linux/net/ipv4/ip_output.c
      9  *
     10  *	This program is free software; you can redistribute it and/or
     11  *      modify it under the terms of the GNU General Public License
     12  *      as published by the Free Software Foundation; either version
     13  *      2 of the License, or (at your option) any later version.
     14  *
     15  *	Changes:
     16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
     17  *				extension headers are implemented.
     18  *				route changes now work.
     19  *				ip6_forward does not confuse sniffers.
     20  *				etc.
     21  *
     22  *      H. von Brand    :       Added missing #include <linux/string.h>
     23  *	Imran Patel	:	frag id should be in NBO
     24  *      Kazunori MIYAZAWA @USAGI
     25  *			:       add ip6_append_data and related functions
     26  *				for datagram xmit
     27  */
     28 
     29 #include <linux/errno.h>
     30 #include <linux/kernel.h>
     31 #include <linux/string.h>
     32 #include <linux/socket.h>
     33 #include <linux/net.h>
     34 #include <linux/netdevice.h>
     35 #include <linux/if_arp.h>
     36 #include <linux/in6.h>
     37 #include <linux/tcp.h>
     38 #include <linux/route.h>
     39 #include <linux/module.h>
     40 #include <linux/slab.h>
     41 
     42 #include <linux/bpf-cgroup.h>
     43 #include <linux/netfilter.h>
     44 #include <linux/netfilter_ipv6.h>
     45 
     46 #include <net/sock.h>
     47 #include <net/snmp.h>
     48 
     49 #include <net/ipv6.h>
     50 #include <net/ndisc.h>
     51 #include <net/protocol.h>
     52 #include <net/ip6_route.h>
     53 #include <net/addrconf.h>
     54 #include <net/rawv6.h>
     55 #include <net/icmp.h>
     56 #include <net/xfrm.h>
     57 #include <net/checksum.h>
     58 #include <linux/mroute6.h>
     59 #include <net/l3mdev.h>
     60 #include <net/lwtunnel.h>
     61 
     62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
     63 {
     64 	struct dst_entry *dst = skb_dst(skb);
     65 	struct net_device *dev = dst->dev;
     66 	struct neighbour *neigh;
     67 	struct in6_addr *nexthop;
     68 	int ret;
     69 
     70 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
     71 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
     72 
     73 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
     74 		    ((mroute6_is_socket(net, skb) &&
     75 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
     76 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
     77 					 &ipv6_hdr(skb)->saddr))) {
     78 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
     79 
     80 			/* Do not check for IFF_ALLMULTI; multicast routing
     81 			   is not supported in any case.
     82 			 */
     83 			if (newskb)
     84 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
     85 					net, sk, newskb, NULL, newskb->dev,
     86 					dev_loopback_xmit);
     87 
     88 			if (ipv6_hdr(skb)->hop_limit == 0) {
     89 				IP6_INC_STATS(net, idev,
     90 					      IPSTATS_MIB_OUTDISCARDS);
     91 				kfree_skb(skb);
     92 				return 0;
     93 			}
     94 		}
     95 
     96 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
     97 
     98 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
     99 		    IPV6_ADDR_SCOPE_NODELOCAL &&
    100 		    !(dev->flags & IFF_LOOPBACK)) {
    101 			kfree_skb(skb);
    102 			return 0;
    103 		}
    104 	}
    105 
    106 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
    107 		int res = lwtunnel_xmit(skb);
    108 
    109 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
    110 			return res;
    111 	}
    112 
    113 	rcu_read_lock_bh();
    114 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
    115 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
    116 	if (unlikely(!neigh))
    117 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
    118 	if (!IS_ERR(neigh)) {
    119 		sock_confirm_neigh(skb, neigh);
    120 		ret = neigh_output(neigh, skb);
    121 		rcu_read_unlock_bh();
    122 		return ret;
    123 	}
    124 	rcu_read_unlock_bh();
    125 
    126 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
    127 	kfree_skb(skb);
    128 	return -EINVAL;
    129 }
    130 
    131 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
    132 {
    133 	int ret;
    134 
    135 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
    136 	if (ret) {
    137 		kfree_skb(skb);
    138 		return ret;
    139 	}
    140 
    141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
    142 	/* Policy lookup after SNAT yielded a new policy */
    143 	if (skb_dst(skb)->xfrm) {
    144 		IPCB(skb)->flags |= IPSKB_REROUTED;
    145 		return dst_output(net, sk, skb);
    146 	}
    147 #endif
    148 
    149 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
    150 	    dst_allfrag(skb_dst(skb)) ||
    151 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
    152 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
    153 	else
    154 		return ip6_finish_output2(net, sk, skb);
    155 }
    156 
    157 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
    158 {
    159 	struct net_device *dev = skb_dst(skb)->dev;
    160 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
    161 
    162 	skb->protocol = htons(ETH_P_IPV6);
    163 	skb->dev = dev;
    164 
    165 	if (unlikely(idev->cnf.disable_ipv6)) {
    166 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
    167 		kfree_skb(skb);
    168 		return 0;
    169 	}
    170 
    171 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
    172 			    net, sk, skb, NULL, dev,
    173 			    ip6_finish_output,
    174 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
    175 }
    176 
    177 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
    178 {
    179 	if (!np->autoflowlabel_set)
    180 		return ip6_default_np_autolabel(net);
    181 	else
    182 		return np->autoflowlabel;
    183 }
    184 
    185 /*
    186  * xmit an sk_buff (used by TCP, SCTP and DCCP)
    187  * Note : socket lock is not held for SYNACK packets, but might be modified
    188  * by calls to skb_set_owner_w() and ipv6_local_error(),
    189  * which are using proper atomic operations or spinlocks.
    190  */
    191 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
    192 	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
    193 {
    194 	struct net *net = sock_net(sk);
    195 	const struct ipv6_pinfo *np = inet6_sk(sk);
    196 	struct in6_addr *first_hop = &fl6->daddr;
    197 	struct dst_entry *dst = skb_dst(skb);
    198 	unsigned int head_room;
    199 	struct ipv6hdr *hdr;
    200 	u8  proto = fl6->flowi6_proto;
    201 	int seg_len = skb->len;
    202 	int hlimit = -1;
    203 	u32 mtu;
    204 
    205 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
    206 	if (opt)
    207 		head_room += opt->opt_nflen + opt->opt_flen;
    208 
    209 	if (unlikely(skb_headroom(skb) < head_room)) {
    210 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
    211 		if (!skb2) {
    212 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
    213 				      IPSTATS_MIB_OUTDISCARDS);
    214 			kfree_skb(skb);
    215 			return -ENOBUFS;
    216 		}
    217 		if (skb->sk)
    218 			skb_set_owner_w(skb2, skb->sk);
    219 		consume_skb(skb);
    220 		skb = skb2;
    221 	}
    222 
    223 	if (opt) {
    224 		seg_len += opt->opt_nflen + opt->opt_flen;
    225 
    226 		if (opt->opt_flen)
    227 			ipv6_push_frag_opts(skb, opt, &proto);
    228 
    229 		if (opt->opt_nflen)
    230 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
    231 					     &fl6->saddr);
    232 	}
    233 
    234 	skb_push(skb, sizeof(struct ipv6hdr));
    235 	skb_reset_network_header(skb);
    236 	hdr = ipv6_hdr(skb);
    237 
    238 	/*
    239 	 *	Fill in the IPv6 header
    240 	 */
    241 	if (np)
    242 		hlimit = np->hop_limit;
    243 	if (hlimit < 0)
    244 		hlimit = ip6_dst_hoplimit(dst);
    245 
    246 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
    247 				ip6_autoflowlabel(net, np), fl6));
    248 
    249 	hdr->payload_len = htons(seg_len);
    250 	hdr->nexthdr = proto;
    251 	hdr->hop_limit = hlimit;
    252 
    253 	hdr->saddr = fl6->saddr;
    254 	hdr->daddr = *first_hop;
    255 
    256 	skb->protocol = htons(ETH_P_IPV6);
    257 	skb->priority = sk->sk_priority;
    258 	skb->mark = mark;
    259 
    260 	mtu = dst_mtu(dst);
    261 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
    262 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
    263 			      IPSTATS_MIB_OUT, skb->len);
    264 
    265 		/* if egress device is enslaved to an L3 master device pass the
    266 		 * skb to its handler for processing
    267 		 */
    268 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
    269 		if (unlikely(!skb))
    270 			return 0;
    271 
    272 		/* hooks should never assume socket lock is held.
    273 		 * we promote our socket to non const
    274 		 */
    275 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
    276 			       net, (struct sock *)sk, skb, NULL, dst->dev,
    277 			       dst_output);
    278 	}
    279 
    280 	skb->dev = dst->dev;
    281 	/* ipv6_local_error() does not require socket lock,
    282 	 * we promote our socket to non const
    283 	 */
    284 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
    285 
    286 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
    287 	kfree_skb(skb);
    288 	return -EMSGSIZE;
    289 }
    290 EXPORT_SYMBOL(ip6_xmit);
    291 
    292 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
    293 {
    294 	struct ip6_ra_chain *ra;
    295 	struct sock *last = NULL;
    296 
    297 	read_lock(&ip6_ra_lock);
    298 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
    299 		struct sock *sk = ra->sk;
    300 		if (sk && ra->sel == sel &&
    301 		    (!sk->sk_bound_dev_if ||
    302 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
    303 			struct ipv6_pinfo *np = inet6_sk(sk);
    304 
    305 			if (np && np->rtalert_isolate &&
    306 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
    307 				continue;
    308 			}
    309 			if (last) {
    310 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
    311 				if (skb2)
    312 					rawv6_rcv(last, skb2);
    313 			}
    314 			last = sk;
    315 		}
    316 	}
    317 
    318 	if (last) {
    319 		rawv6_rcv(last, skb);
    320 		read_unlock(&ip6_ra_lock);
    321 		return 1;
    322 	}
    323 	read_unlock(&ip6_ra_lock);
    324 	return 0;
    325 }
    326 
    327 static int ip6_forward_proxy_check(struct sk_buff *skb)
    328 {
    329 	struct ipv6hdr *hdr = ipv6_hdr(skb);
    330 	u8 nexthdr = hdr->nexthdr;
    331 	__be16 frag_off;
    332 	int offset;
    333 
    334 	if (ipv6_ext_hdr(nexthdr)) {
    335 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
    336 		if (offset < 0)
    337 			return 0;
    338 	} else
    339 		offset = sizeof(struct ipv6hdr);
    340 
    341 	if (nexthdr == IPPROTO_ICMPV6) {
    342 		struct icmp6hdr *icmp6;
    343 
    344 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
    345 					 offset + 1 - skb->data)))
    346 			return 0;
    347 
    348 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
    349 
    350 		switch (icmp6->icmp6_type) {
    351 		case NDISC_ROUTER_SOLICITATION:
    352 		case NDISC_ROUTER_ADVERTISEMENT:
    353 		case NDISC_NEIGHBOUR_SOLICITATION:
    354 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
    355 		case NDISC_REDIRECT:
    356 			/* For reaction involving unicast neighbor discovery
    357 			 * message destined to the proxied address, pass it to
    358 			 * input function.
    359 			 */
    360 			return 1;
    361 		default:
    362 			break;
    363 		}
    364 	}
    365 
    366 	/*
    367 	 * The proxying router can't forward traffic sent to a link-local
    368 	 * address, so signal the sender and discard the packet. This
    369 	 * behavior is clarified by the MIPv6 specification.
    370 	 */
    371 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
    372 		dst_link_failure(skb);
    373 		return -1;
    374 	}
    375 
    376 	return 0;
    377 }
    378 
    379 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
    380 				     struct sk_buff *skb)
    381 {
    382 	struct dst_entry *dst = skb_dst(skb);
    383 
    384 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
    385 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
    386 
    387 #ifdef CONFIG_NET_SWITCHDEV
    388 	if (skb->offload_l3_fwd_mark) {
    389 		consume_skb(skb);
    390 		return 0;
    391 	}
    392 #endif
    393 
    394 	skb->tstamp = 0;
    395 	return dst_output(net, sk, skb);
    396 }
    397 
    398 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
    399 {
    400 	if (skb->len <= mtu)
    401 		return false;
    402 
    403 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
    404 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
    405 		return true;
    406 
    407 	if (skb->ignore_df)
    408 		return false;
    409 
    410 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
    411 		return false;
    412 
    413 	return true;
    414 }
    415 
    416 int ip6_forward(struct sk_buff *skb)
    417 {
    418 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
    419 	struct dst_entry *dst = skb_dst(skb);
    420 	struct ipv6hdr *hdr = ipv6_hdr(skb);
    421 	struct inet6_skb_parm *opt = IP6CB(skb);
    422 	struct net *net = dev_net(dst->dev);
    423 	u32 mtu;
    424 
    425 	if (net->ipv6.devconf_all->forwarding == 0)
    426 		goto error;
    427 
    428 	if (skb->pkt_type != PACKET_HOST)
    429 		goto drop;
    430 
    431 	if (unlikely(skb->sk))
    432 		goto drop;
    433 
    434 	if (skb_warn_if_lro(skb))
    435 		goto drop;
    436 
    437 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
    438 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
    439 		goto drop;
    440 	}
    441 
    442 	skb_forward_csum(skb);
    443 
    444 	/*
    445 	 *	We DO NOT make any processing on
    446 	 *	RA packets, pushing them to user level AS IS
    447 	 *	without ane WARRANTY that application will be able
    448 	 *	to interpret them. The reason is that we
    449 	 *	cannot make anything clever here.
    450 	 *
    451 	 *	We are not end-node, so that if packet contains
    452 	 *	AH/ESP, we cannot make anything.
    453 	 *	Defragmentation also would be mistake, RA packets
    454 	 *	cannot be fragmented, because there is no warranty
    455 	 *	that different fragments will go along one path. --ANK
    456 	 */
    457 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
    458 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
    459 			return 0;
    460 	}
    461 
    462 	/*
    463 	 *	check and decrement ttl
    464 	 */
    465 	if (hdr->hop_limit <= 1) {
    466 		/* Force OUTPUT device used as source address */
    467 		skb->dev = dst->dev;
    468 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
    469 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
    470 
    471 		kfree_skb(skb);
    472 		return -ETIMEDOUT;
    473 	}
    474 
    475 	/* XXX: idev->cnf.proxy_ndp? */
    476 	if (net->ipv6.devconf_all->proxy_ndp &&
    477 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
    478 		int proxied = ip6_forward_proxy_check(skb);
    479 		if (proxied > 0)
    480 			return ip6_input(skb);
    481 		else if (proxied < 0) {
    482 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
    483 			goto drop;
    484 		}
    485 	}
    486 
    487 	if (!xfrm6_route_forward(skb)) {
    488 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
    489 		goto drop;
    490 	}
    491 	dst = skb_dst(skb);
    492 
    493 	/* IPv6 specs say nothing about it, but it is clear that we cannot
    494 	   send redirects to source routed frames.
    495 	   We don't send redirects to frames decapsulated from IPsec.
    496 	 */
    497 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
    498 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
    499 		struct in6_addr *target = NULL;
    500 		struct inet_peer *peer;
    501 		struct rt6_info *rt;
    502 
    503 		/*
    504 		 *	incoming and outgoing devices are the same
    505 		 *	send a redirect.
    506 		 */
    507 
    508 		rt = (struct rt6_info *) dst;
    509 		if (rt->rt6i_flags & RTF_GATEWAY)
    510 			target = &rt->rt6i_gateway;
    511 		else
    512 			target = &hdr->daddr;
    513 
    514 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
    515 
    516 		/* Limit redirects both by destination (here)
    517 		   and by source (inside ndisc_send_redirect)
    518 		 */
    519 		if (inet_peer_xrlim_allow(peer, 1*HZ))
    520 			ndisc_send_redirect(skb, target);
    521 		if (peer)
    522 			inet_putpeer(peer);
    523 	} else {
    524 		int addrtype = ipv6_addr_type(&hdr->saddr);
    525 
    526 		/* This check is security critical. */
    527 		if (addrtype == IPV6_ADDR_ANY ||
    528 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
    529 			goto error;
    530 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
    531 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
    532 				    ICMPV6_NOT_NEIGHBOUR, 0);
    533 			goto error;
    534 		}
    535 	}
    536 
    537 	mtu = ip6_dst_mtu_forward(dst);
    538 	if (mtu < IPV6_MIN_MTU)
    539 		mtu = IPV6_MIN_MTU;
    540 
    541 	if (ip6_pkt_too_big(skb, mtu)) {
    542 		/* Again, force OUTPUT device used as source address */
    543 		skb->dev = dst->dev;
    544 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
    545 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
    546 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
    547 				IPSTATS_MIB_FRAGFAILS);
    548 		kfree_skb(skb);
    549 		return -EMSGSIZE;
    550 	}
    551 
    552 	if (skb_cow(skb, dst->dev->hard_header_len)) {
    553 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
    554 				IPSTATS_MIB_OUTDISCARDS);
    555 		goto drop;
    556 	}
    557 
    558 	hdr = ipv6_hdr(skb);
    559 
    560 	/* Mangling hops number delayed to point after skb COW */
    561 
    562 	hdr->hop_limit--;
    563 
    564 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
    565 		       net, NULL, skb, skb->dev, dst->dev,
    566 		       ip6_forward_finish);
    567 
    568 error:
    569 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
    570 drop:
    571 	kfree_skb(skb);
    572 	return -EINVAL;
    573 }
    574 
    575 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
    576 {
    577 	to->pkt_type = from->pkt_type;
    578 	to->priority = from->priority;
    579 	to->protocol = from->protocol;
    580 	skb_dst_drop(to);
    581 	skb_dst_set(to, dst_clone(skb_dst(from)));
    582 	to->dev = from->dev;
    583 	to->mark = from->mark;
    584 
    585 	skb_copy_hash(to, from);
    586 
    587 #ifdef CONFIG_NET_SCHED
    588 	to->tc_index = from->tc_index;
    589 #endif
    590 	nf_copy(to, from);
    591 	skb_ext_copy(to, from);
    592 	skb_copy_secmark(to, from);
    593 }
    594 
    595 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
    596 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
    597 {
    598 	struct sk_buff *frag;
    599 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
    600 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
    601 				inet6_sk(skb->sk) : NULL;
    602 	struct ipv6hdr *tmp_hdr;
    603 	struct frag_hdr *fh;
    604 	unsigned int mtu, hlen, left, len, nexthdr_offset;
    605 	int hroom, troom;
    606 	__be32 frag_id;
    607 	int ptr, offset = 0, err = 0;
    608 	u8 *prevhdr, nexthdr = 0;
    609 
    610 	err = ip6_find_1stfragopt(skb, &prevhdr);
    611 	if (err < 0)
    612 		goto fail;
    613 	hlen = err;
    614 	nexthdr = *prevhdr;
    615 	nexthdr_offset = prevhdr - skb_network_header(skb);
    616 
    617 	mtu = ip6_skb_dst_mtu(skb);
    618 
    619 	/* We must not fragment if the socket is set to force MTU discovery
    620 	 * or if the skb it not generated by a local socket.
    621 	 */
    622 	if (unlikely(!skb->ignore_df && skb->len > mtu))
    623 		goto fail_toobig;
    624 
    625 	if (IP6CB(skb)->frag_max_size) {
    626 		if (IP6CB(skb)->frag_max_size > mtu)
    627 			goto fail_toobig;
    628 
    629 		/* don't send fragments larger than what we received */
    630 		mtu = IP6CB(skb)->frag_max_size;
    631 		if (mtu < IPV6_MIN_MTU)
    632 			mtu = IPV6_MIN_MTU;
    633 	}
    634 
    635 	if (np && np->frag_size < mtu) {
    636 		if (np->frag_size)
    637 			mtu = np->frag_size;
    638 	}
    639 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
    640 		goto fail_toobig;
    641 	mtu -= hlen + sizeof(struct frag_hdr);
    642 
    643 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
    644 				    &ipv6_hdr(skb)->saddr);
    645 
    646 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
    647 	    (err = skb_checksum_help(skb)))
    648 		goto fail;
    649 
    650 	prevhdr = skb_network_header(skb) + nexthdr_offset;
    651 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
    652 	if (skb_has_frag_list(skb)) {
    653 		unsigned int first_len = skb_pagelen(skb);
    654 		struct sk_buff *frag2;
    655 
    656 		if (first_len - hlen > mtu ||
    657 		    ((first_len - hlen) & 7) ||
    658 		    skb_cloned(skb) ||
    659 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
    660 			goto slow_path;
    661 
    662 		skb_walk_frags(skb, frag) {
    663 			/* Correct geometry. */
    664 			if (frag->len > mtu ||
    665 			    ((frag->len & 7) && frag->next) ||
    666 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
    667 				goto slow_path_clean;
    668 
    669 			/* Partially cloned skb? */
    670 			if (skb_shared(frag))
    671 				goto slow_path_clean;
    672 
    673 			BUG_ON(frag->sk);
    674 			if (skb->sk) {
    675 				frag->sk = skb->sk;
    676 				frag->destructor = sock_wfree;
    677 			}
    678 			skb->truesize -= frag->truesize;
    679 		}
    680 
    681 		err = 0;
    682 		offset = 0;
    683 		/* BUILD HEADER */
    684 
    685 		*prevhdr = NEXTHDR_FRAGMENT;
    686 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
    687 		if (!tmp_hdr) {
    688 			err = -ENOMEM;
    689 			goto fail;
    690 		}
    691 		frag = skb_shinfo(skb)->frag_list;
    692 		skb_frag_list_init(skb);
    693 
    694 		__skb_pull(skb, hlen);
    695 		fh = __skb_push(skb, sizeof(struct frag_hdr));
    696 		__skb_push(skb, hlen);
    697 		skb_reset_network_header(skb);
    698 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
    699 
    700 		fh->nexthdr = nexthdr;
    701 		fh->reserved = 0;
    702 		fh->frag_off = htons(IP6_MF);
    703 		fh->identification = frag_id;
    704 
    705 		first_len = skb_pagelen(skb);
    706 		skb->data_len = first_len - skb_headlen(skb);
    707 		skb->len = first_len;
    708 		ipv6_hdr(skb)->payload_len = htons(first_len -
    709 						   sizeof(struct ipv6hdr));
    710 
    711 		for (;;) {
    712 			/* Prepare header of the next frame,
    713 			 * before previous one went down. */
    714 			if (frag) {
    715 				frag->ip_summed = CHECKSUM_NONE;
    716 				skb_reset_transport_header(frag);
    717 				fh = __skb_push(frag, sizeof(struct frag_hdr));
    718 				__skb_push(frag, hlen);
    719 				skb_reset_network_header(frag);
    720 				memcpy(skb_network_header(frag), tmp_hdr,
    721 				       hlen);
    722 				offset += skb->len - hlen - sizeof(struct frag_hdr);
    723 				fh->nexthdr = nexthdr;
    724 				fh->reserved = 0;
    725 				fh->frag_off = htons(offset);
    726 				if (frag->next)
    727 					fh->frag_off |= htons(IP6_MF);
    728 				fh->identification = frag_id;
    729 				ipv6_hdr(frag)->payload_len =
    730 						htons(frag->len -
    731 						      sizeof(struct ipv6hdr));
    732 				ip6_copy_metadata(frag, skb);
    733 			}
    734 
    735 			err = output(net, sk, skb);
    736 			if (!err)
    737 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
    738 					      IPSTATS_MIB_FRAGCREATES);
    739 
    740 			if (err || !frag)
    741 				break;
    742 
    743 			skb = frag;
    744 			frag = skb->next;
    745 			skb_mark_not_on_list(skb);
    746 		}
    747 
    748 		kfree(tmp_hdr);
    749 
    750 		if (err == 0) {
    751 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
    752 				      IPSTATS_MIB_FRAGOKS);
    753 			return 0;
    754 		}
    755 
    756 		kfree_skb_list(frag);
    757 
    758 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
    759 			      IPSTATS_MIB_FRAGFAILS);
    760 		return err;
    761 
    762 slow_path_clean:
    763 		skb_walk_frags(skb, frag2) {
    764 			if (frag2 == frag)
    765 				break;
    766 			frag2->sk = NULL;
    767 			frag2->destructor = NULL;
    768 			skb->truesize += frag2->truesize;
    769 		}
    770 	}
    771 
    772 slow_path:
    773 	left = skb->len - hlen;		/* Space per frame */
    774 	ptr = hlen;			/* Where to start from */
    775 
    776 	/*
    777 	 *	Fragment the datagram.
    778 	 */
    779 
    780 	troom = rt->dst.dev->needed_tailroom;
    781 
    782 	/*
    783 	 *	Keep copying data until we run out.
    784 	 */
    785 	while (left > 0)	{
    786 		u8 *fragnexthdr_offset;
    787 
    788 		len = left;
    789 		/* IF: it doesn't fit, use 'mtu' - the data space left */
    790 		if (len > mtu)
    791 			len = mtu;
    792 		/* IF: we are not sending up to and including the packet end
    793 		   then align the next start on an eight byte boundary */
    794 		if (len < left)	{
    795 			len &= ~7;
    796 		}
    797 
    798 		/* Allocate buffer */
    799 		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
    800 				 hroom + troom, GFP_ATOMIC);
    801 		if (!frag) {
    802 			err = -ENOMEM;
    803 			goto fail;
    804 		}
    805 
    806 		/*
    807 		 *	Set up data on packet
    808 		 */
    809 
    810 		ip6_copy_metadata(frag, skb);
    811 		skb_reserve(frag, hroom);
    812 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
    813 		skb_reset_network_header(frag);
    814 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
    815 		frag->transport_header = (frag->network_header + hlen +
    816 					  sizeof(struct frag_hdr));
    817 
    818 		/*
    819 		 *	Charge the memory for the fragment to any owner
    820 		 *	it might possess
    821 		 */
    822 		if (skb->sk)
    823 			skb_set_owner_w(frag, skb->sk);
    824 
    825 		/*
    826 		 *	Copy the packet header into the new buffer.
    827 		 */
    828 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
    829 
    830 		fragnexthdr_offset = skb_network_header(frag);
    831 		fragnexthdr_offset += prevhdr - skb_network_header(skb);
    832 		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
    833 
    834 		/*
    835 		 *	Build fragment header.
    836 		 */
    837 		fh->nexthdr = nexthdr;
    838 		fh->reserved = 0;
    839 		fh->identification = frag_id;
    840 
    841 		/*
    842 		 *	Copy a block of the IP datagram.
    843 		 */
    844 		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
    845 				     len));
    846 		left -= len;
    847 
    848 		fh->frag_off = htons(offset);
    849 		if (left > 0)
    850 			fh->frag_off |= htons(IP6_MF);
    851 		ipv6_hdr(frag)->payload_len = htons(frag->len -
    852 						    sizeof(struct ipv6hdr));
    853 
    854 		ptr += len;
    855 		offset += len;
    856 
    857 		/*
    858 		 *	Put this fragment into the sending queue.
    859 		 */
    860 		err = output(net, sk, frag);
    861 		if (err)
    862 			goto fail;
    863 
    864 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
    865 			      IPSTATS_MIB_FRAGCREATES);
    866 	}
    867 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
    868 		      IPSTATS_MIB_FRAGOKS);
    869 	consume_skb(skb);
    870 	return err;
    871 
    872 fail_toobig:
    873 	if (skb->sk && dst_allfrag(skb_dst(skb)))
    874 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
    875 
    876 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
    877 	err = -EMSGSIZE;
    878 
    879 fail:
    880 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
    881 		      IPSTATS_MIB_FRAGFAILS);
    882 	kfree_skb(skb);
    883 	return err;
    884 }
    885 
    886 static inline int ip6_rt_check(const struct rt6key *rt_key,
    887 			       const struct in6_addr *fl_addr,
    888 			       const struct in6_addr *addr_cache)
    889 {
    890 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
    891 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
    892 }
    893 
    894 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
    895 					  struct dst_entry *dst,
    896 					  const struct flowi6 *fl6)
    897 {
    898 	struct ipv6_pinfo *np = inet6_sk(sk);
    899 	struct rt6_info *rt;
    900 
    901 	if (!dst)
    902 		goto out;
    903 
    904 	if (dst->ops->family != AF_INET6) {
    905 		dst_release(dst);
    906 		return NULL;
    907 	}
    908 
    909 	rt = (struct rt6_info *)dst;
    910 	/* Yes, checking route validity in not connected
    911 	 * case is not very simple. Take into account,
    912 	 * that we do not support routing by source, TOS,
    913 	 * and MSG_DONTROUTE		--ANK (980726)
    914 	 *
    915 	 * 1. ip6_rt_check(): If route was host route,
    916 	 *    check that cached destination is current.
    917 	 *    If it is network route, we still may
    918 	 *    check its validity using saved pointer
    919 	 *    to the last used address: daddr_cache.
    920 	 *    We do not want to save whole address now,
    921 	 *    (because main consumer of this service
    922 	 *    is tcp, which has not this problem),
    923 	 *    so that the last trick works only on connected
    924 	 *    sockets.
    925 	 * 2. oif also should be the same.
    926 	 */
    927 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
    928 #ifdef CONFIG_IPV6_SUBTREES
    929 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
    930 #endif
    931 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
    932 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
    933 		dst_release(dst);
    934 		dst = NULL;
    935 	}
    936 
    937 out:
    938 	return dst;
    939 }
    940 
    941 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
    942 			       struct dst_entry **dst, struct flowi6 *fl6)
    943 {
    944 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
    945 	struct neighbour *n;
    946 	struct rt6_info *rt;
    947 #endif
    948 	int err;
    949 	int flags = 0;
    950 
    951 	/* The correct way to handle this would be to do
    952 	 * ip6_route_get_saddr, and then ip6_route_output; however,
    953 	 * the route-specific preferred source forces the
    954 	 * ip6_route_output call _before_ ip6_route_get_saddr.
    955 	 *
    956 	 * In source specific routing (no src=any default route),
    957 	 * ip6_route_output will fail given src=any saddr, though, so
    958 	 * that's why we try it again later.
    959 	 */
    960 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
    961 		struct fib6_info *from;
    962 		struct rt6_info *rt;
    963 		bool had_dst = *dst != NULL;
    964 
    965 		if (!had_dst)
    966 			*dst = ip6_route_output(net, sk, fl6);
    967 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
    968 
    969 		rcu_read_lock();
    970 		from = rt ? rcu_dereference(rt->from) : NULL;
    971 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
    972 					  sk ? inet6_sk(sk)->srcprefs : 0,
    973 					  &fl6->saddr);
    974 		rcu_read_unlock();
    975 
    976 		if (err)
    977 			goto out_err_release;
    978 
    979 		/* If we had an erroneous initial result, pretend it
    980 		 * never existed and let the SA-enabled version take
    981 		 * over.
    982 		 */
    983 		if (!had_dst && (*dst)->error) {
    984 			dst_release(*dst);
    985 			*dst = NULL;
    986 		}
    987 
    988 		if (fl6->flowi6_oif)
    989 			flags |= RT6_LOOKUP_F_IFACE;
    990 	}
    991 
    992 	if (!*dst)
    993 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
    994 
    995 	err = (*dst)->error;
    996 	if (err)
    997 		goto out_err_release;
    998 
    999 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
   1000 	/*
   1001 	 * Here if the dst entry we've looked up
   1002 	 * has a neighbour entry that is in the INCOMPLETE
   1003 	 * state and the src address from the flow is
   1004 	 * marked as OPTIMISTIC, we release the found
   1005 	 * dst entry and replace it instead with the
   1006 	 * dst entry of the nexthop router
   1007 	 */
   1008 	rt = (struct rt6_info *) *dst;
   1009 	rcu_read_lock_bh();
   1010 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
   1011 				      rt6_nexthop(rt, &fl6->daddr));
   1012 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
   1013 	rcu_read_unlock_bh();
   1014 
   1015 	if (err) {
   1016 		struct inet6_ifaddr *ifp;
   1017 		struct flowi6 fl_gw6;
   1018 		int redirect;
   1019 
   1020 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
   1021 				      (*dst)->dev, 1);
   1022 
   1023 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
   1024 		if (ifp)
   1025 			in6_ifa_put(ifp);
   1026 
   1027 		if (redirect) {
   1028 			/*
   1029 			 * We need to get the dst entry for the
   1030 			 * default router instead
   1031 			 */
   1032 			dst_release(*dst);
   1033 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
   1034 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
   1035 			*dst = ip6_route_output(net, sk, &fl_gw6);
   1036 			err = (*dst)->error;
   1037 			if (err)
   1038 				goto out_err_release;
   1039 		}
   1040 	}
   1041 #endif
   1042 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
   1043 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
   1044 		err = -EAFNOSUPPORT;
   1045 		goto out_err_release;
   1046 	}
   1047 
   1048 	return 0;
   1049 
   1050 out_err_release:
   1051 	dst_release(*dst);
   1052 	*dst = NULL;
   1053 
   1054 	if (err == -ENETUNREACH)
   1055 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
   1056 	return err;
   1057 }
   1058 
   1059 /**
   1060  *	ip6_dst_lookup - perform route lookup on flow
   1061  *	@sk: socket which provides route info
   1062  *	@dst: pointer to dst_entry * for result
   1063  *	@fl6: flow to lookup
   1064  *
   1065  *	This function performs a route lookup on the given flow.
   1066  *
   1067  *	It returns zero on success, or a standard errno code on error.
   1068  */
   1069 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
   1070 		   struct flowi6 *fl6)
   1071 {
   1072 	*dst = NULL;
   1073 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
   1074 }
   1075 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
   1076 
   1077 /**
   1078  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
   1079  *	@sk: socket which provides route info
   1080  *	@fl6: flow to lookup
   1081  *	@final_dst: final destination address for ipsec lookup
   1082  *
   1083  *	This function performs a route lookup on the given flow.
   1084  *
   1085  *	It returns a valid dst pointer on success, or a pointer encoded
   1086  *	error code.
   1087  */
   1088 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
   1089 				      const struct in6_addr *final_dst)
   1090 {
   1091 	struct dst_entry *dst = NULL;
   1092 	int err;
   1093 
   1094 	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
   1095 	if (err)
   1096 		return ERR_PTR(err);
   1097 	if (final_dst)
   1098 		fl6->daddr = *final_dst;
   1099 
   1100 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
   1101 }
   1102 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
   1103 
   1104 /**
   1105  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
   1106  *	@sk: socket which provides the dst cache and route info
   1107  *	@fl6: flow to lookup
   1108  *	@final_dst: final destination address for ipsec lookup
   1109  *	@connected: whether @sk is connected or not
   1110  *
   1111  *	This function performs a route lookup on the given flow with the
   1112  *	possibility of using the cached route in the socket if it is valid.
   1113  *	It will take the socket dst lock when operating on the dst cache.
   1114  *	As a result, this function can only be used in process context.
   1115  *
   1116  *	In addition, for a connected socket, cache the dst in the socket
   1117  *	if the current cache is not valid.
   1118  *
   1119  *	It returns a valid dst pointer on success, or a pointer encoded
   1120  *	error code.
   1121  */
   1122 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
   1123 					 const struct in6_addr *final_dst,
   1124 					 bool connected)
   1125 {
   1126 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
   1127 
   1128 	dst = ip6_sk_dst_check(sk, dst, fl6);
   1129 	if (dst)
   1130 		return dst;
   1131 
   1132 	dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
   1133 	if (connected && !IS_ERR(dst))
   1134 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
   1135 
   1136 	return dst;
   1137 }
   1138 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
   1139 
   1140 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
   1141 					       gfp_t gfp)
   1142 {
   1143 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
   1144 }
   1145 
   1146 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
   1147 						gfp_t gfp)
   1148 {
   1149 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
   1150 }
   1151 
   1152 static void ip6_append_data_mtu(unsigned int *mtu,
   1153 				int *maxfraglen,
   1154 				unsigned int fragheaderlen,
   1155 				struct sk_buff *skb,
   1156 				struct rt6_info *rt,
   1157 				unsigned int orig_mtu)
   1158 {
   1159 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
   1160 		if (!skb) {
   1161 			/* first fragment, reserve header_len */
   1162 			*mtu = orig_mtu - rt->dst.header_len;
   1163 
   1164 		} else {
   1165 			/*
   1166 			 * this fragment is not first, the headers
   1167 			 * space is regarded as data space.
   1168 			 */
   1169 			*mtu = orig_mtu;
   1170 		}
   1171 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
   1172 			      + fragheaderlen - sizeof(struct frag_hdr);
   1173 	}
   1174 }
   1175 
   1176 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
   1177 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
   1178 			  struct rt6_info *rt, struct flowi6 *fl6)
   1179 {
   1180 	struct ipv6_pinfo *np = inet6_sk(sk);
   1181 	unsigned int mtu;
   1182 	struct ipv6_txoptions *opt = ipc6->opt;
   1183 
   1184 	/*
   1185 	 * setup for corking
   1186 	 */
   1187 	if (opt) {
   1188 		if (WARN_ON(v6_cork->opt))
   1189 			return -EINVAL;
   1190 
   1191 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
   1192 		if (unlikely(!v6_cork->opt))
   1193 			return -ENOBUFS;
   1194 
   1195 		v6_cork->opt->tot_len = sizeof(*opt);
   1196 		v6_cork->opt->opt_flen = opt->opt_flen;
   1197 		v6_cork->opt->opt_nflen = opt->opt_nflen;
   1198 
   1199 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
   1200 						    sk->sk_allocation);
   1201 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
   1202 			return -ENOBUFS;
   1203 
   1204 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
   1205 						    sk->sk_allocation);
   1206 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
   1207 			return -ENOBUFS;
   1208 
   1209 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
   1210 						   sk->sk_allocation);
   1211 		if (opt->hopopt && !v6_cork->opt->hopopt)
   1212 			return -ENOBUFS;
   1213 
   1214 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
   1215 						    sk->sk_allocation);
   1216 		if (opt->srcrt && !v6_cork->opt->srcrt)
   1217 			return -ENOBUFS;
   1218 
   1219 		/* need source address above miyazawa*/
   1220 	}
   1221 	dst_hold(&rt->dst);
   1222 	cork->base.dst = &rt->dst;
   1223 	cork->fl.u.ip6 = *fl6;
   1224 	v6_cork->hop_limit = ipc6->hlimit;
   1225 	v6_cork->tclass = ipc6->tclass;
   1226 	if (rt->dst.flags & DST_XFRM_TUNNEL)
   1227 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
   1228 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
   1229 	else
   1230 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
   1231 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
   1232 	if (np->frag_size < mtu) {
   1233 		if (np->frag_size)
   1234 			mtu = np->frag_size;
   1235 	}
   1236 	if (mtu < IPV6_MIN_MTU)
   1237 		return -EINVAL;
   1238 	cork->base.fragsize = mtu;
   1239 	cork->base.gso_size = ipc6->gso_size;
   1240 	cork->base.tx_flags = 0;
   1241 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
   1242 
   1243 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
   1244 		cork->base.flags |= IPCORK_ALLFRAG;
   1245 	cork->base.length = 0;
   1246 
   1247 	cork->base.transmit_time = ipc6->sockc.transmit_time;
   1248 
   1249 	return 0;
   1250 }
   1251 
   1252 static int __ip6_append_data(struct sock *sk,
   1253 			     struct flowi6 *fl6,
   1254 			     struct sk_buff_head *queue,
   1255 			     struct inet_cork *cork,
   1256 			     struct inet6_cork *v6_cork,
   1257 			     struct page_frag *pfrag,
   1258 			     int getfrag(void *from, char *to, int offset,
   1259 					 int len, int odd, struct sk_buff *skb),
   1260 			     void *from, int length, int transhdrlen,
   1261 			     unsigned int flags, struct ipcm6_cookie *ipc6)
   1262 {
   1263 	struct sk_buff *skb, *skb_prev = NULL;
   1264 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
   1265 	struct ubuf_info *uarg = NULL;
   1266 	int exthdrlen = 0;
   1267 	int dst_exthdrlen = 0;
   1268 	int hh_len;
   1269 	int copy;
   1270 	int err;
   1271 	int offset = 0;
   1272 	u32 tskey = 0;
   1273 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
   1274 	struct ipv6_txoptions *opt = v6_cork->opt;
   1275 	int csummode = CHECKSUM_NONE;
   1276 	unsigned int maxnonfragsize, headersize;
   1277 	unsigned int wmem_alloc_delta = 0;
   1278 	bool paged, extra_uref;
   1279 
   1280 	skb = skb_peek_tail(queue);
   1281 	if (!skb) {
   1282 		exthdrlen = opt ? opt->opt_flen : 0;
   1283 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
   1284 	}
   1285 
   1286 	paged = !!cork->gso_size;
   1287 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
   1288 	orig_mtu = mtu;
   1289 
   1290 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
   1291 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
   1292 		tskey = sk->sk_tskey++;
   1293 
   1294 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
   1295 
   1296 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
   1297 			(opt ? opt->opt_nflen : 0);
   1298 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
   1299 		     sizeof(struct frag_hdr);
   1300 
   1301 	headersize = sizeof(struct ipv6hdr) +
   1302 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
   1303 		     (dst_allfrag(&rt->dst) ?
   1304 		      sizeof(struct frag_hdr) : 0) +
   1305 		     rt->rt6i_nfheader_len;
   1306 
   1307 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
   1308 	 * the first fragment
   1309 	 */
   1310 	if (headersize + transhdrlen > mtu)
   1311 		goto emsgsize;
   1312 
   1313 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
   1314 	    (sk->sk_protocol == IPPROTO_UDP ||
   1315 	     sk->sk_protocol == IPPROTO_RAW)) {
   1316 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
   1317 				sizeof(struct ipv6hdr));
   1318 		goto emsgsize;
   1319 	}
   1320 
   1321 	if (ip6_sk_ignore_df(sk))
   1322 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
   1323 	else
   1324 		maxnonfragsize = mtu;
   1325 
   1326 	if (cork->length + length > maxnonfragsize - headersize) {
   1327 emsgsize:
   1328 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
   1329 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
   1330 		return -EMSGSIZE;
   1331 	}
   1332 
   1333 	/* CHECKSUM_PARTIAL only with no extension headers and when
   1334 	 * we are not going to fragment
   1335 	 */
   1336 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
   1337 	    headersize == sizeof(struct ipv6hdr) &&
   1338 	    length <= mtu - headersize &&
   1339 	    (!(flags & MSG_MORE) || cork->gso_size) &&
   1340 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
   1341 		csummode = CHECKSUM_PARTIAL;
   1342 
   1343 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
   1344 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
   1345 		if (!uarg)
   1346 			return -ENOBUFS;
   1347 		extra_uref = true;
   1348 		if (rt->dst.dev->features & NETIF_F_SG &&
   1349 		    csummode == CHECKSUM_PARTIAL) {
   1350 			paged = true;
   1351 		} else {
   1352 			uarg->zerocopy = 0;
   1353 			skb_zcopy_set(skb, uarg, &extra_uref);
   1354 		}
   1355 	}
   1356 
   1357 	/*
   1358 	 * Let's try using as much space as possible.
   1359 	 * Use MTU if total length of the message fits into the MTU.
   1360 	 * Otherwise, we need to reserve fragment header and
   1361 	 * fragment alignment (= 8-15 octects, in total).
   1362 	 *
   1363 	 * Note that we may need to "move" the data from the tail of
   1364 	 * of the buffer to the new fragment when we split
   1365 	 * the message.
   1366 	 *
   1367 	 * FIXME: It may be fragmented into multiple chunks
   1368 	 *        at once if non-fragmentable extension headers
   1369 	 *        are too large.
   1370 	 * --yoshfuji
   1371 	 */
   1372 
   1373 	cork->length += length;
   1374 	if (!skb)
   1375 		goto alloc_new_skb;
   1376 
   1377 	while (length > 0) {
   1378 		/* Check if the remaining data fits into current packet. */
   1379 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
   1380 		if (copy < length)
   1381 			copy = maxfraglen - skb->len;
   1382 
   1383 		if (copy <= 0) {
   1384 			char *data;
   1385 			unsigned int datalen;
   1386 			unsigned int fraglen;
   1387 			unsigned int fraggap;
   1388 			unsigned int alloclen;
   1389 			unsigned int pagedlen;
   1390 alloc_new_skb:
   1391 			/* There's no room in the current skb */
   1392 			if (skb)
   1393 				fraggap = skb->len - maxfraglen;
   1394 			else
   1395 				fraggap = 0;
   1396 			/* update mtu and maxfraglen if necessary */
   1397 			if (!skb || !skb_prev)
   1398 				ip6_append_data_mtu(&mtu, &maxfraglen,
   1399 						    fragheaderlen, skb, rt,
   1400 						    orig_mtu);
   1401 
   1402 			skb_prev = skb;
   1403 
   1404 			/*
   1405 			 * If remaining data exceeds the mtu,
   1406 			 * we know we need more fragment(s).
   1407 			 */
   1408 			datalen = length + fraggap;
   1409 
   1410 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
   1411 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
   1412 			fraglen = datalen + fragheaderlen;
   1413 			pagedlen = 0;
   1414 
   1415 			if ((flags & MSG_MORE) &&
   1416 			    !(rt->dst.dev->features&NETIF_F_SG))
   1417 				alloclen = mtu;
   1418 			else if (!paged)
   1419 				alloclen = fraglen;
   1420 			else {
   1421 				alloclen = min_t(int, fraglen, MAX_HEADER);
   1422 				pagedlen = fraglen - alloclen;
   1423 			}
   1424 
   1425 			alloclen += dst_exthdrlen;
   1426 
   1427 			if (datalen != length + fraggap) {
   1428 				/*
   1429 				 * this is not the last fragment, the trailer
   1430 				 * space is regarded as data space.
   1431 				 */
   1432 				datalen += rt->dst.trailer_len;
   1433 			}
   1434 
   1435 			alloclen += rt->dst.trailer_len;
   1436 			fraglen = datalen + fragheaderlen;
   1437 
   1438 			/*
   1439 			 * We just reserve space for fragment header.
   1440 			 * Note: this may be overallocation if the message
   1441 			 * (without MSG_MORE) fits into the MTU.
   1442 			 */
   1443 			alloclen += sizeof(struct frag_hdr);
   1444 
   1445 			copy = datalen - transhdrlen - fraggap - pagedlen;
   1446 			if (copy < 0) {
   1447 				err = -EINVAL;
   1448 				goto error;
   1449 			}
   1450 			if (transhdrlen) {
   1451 				skb = sock_alloc_send_skb(sk,
   1452 						alloclen + hh_len,
   1453 						(flags & MSG_DONTWAIT), &err);
   1454 			} else {
   1455 				skb = NULL;
   1456 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
   1457 				    2 * sk->sk_sndbuf)
   1458 					skb = alloc_skb(alloclen + hh_len,
   1459 							sk->sk_allocation);
   1460 				if (unlikely(!skb))
   1461 					err = -ENOBUFS;
   1462 			}
   1463 			if (!skb)
   1464 				goto error;
   1465 			/*
   1466 			 *	Fill in the control structures
   1467 			 */
   1468 			skb->protocol = htons(ETH_P_IPV6);
   1469 			skb->ip_summed = csummode;
   1470 			skb->csum = 0;
   1471 			/* reserve for fragmentation and ipsec header */
   1472 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
   1473 				    dst_exthdrlen);
   1474 
   1475 			/*
   1476 			 *	Find where to start putting bytes
   1477 			 */
   1478 			data = skb_put(skb, fraglen - pagedlen);
   1479 			skb_set_network_header(skb, exthdrlen);
   1480 			data += fragheaderlen;
   1481 			skb->transport_header = (skb->network_header +
   1482 						 fragheaderlen);
   1483 			if (fraggap) {
   1484 				skb->csum = skb_copy_and_csum_bits(
   1485 					skb_prev, maxfraglen,
   1486 					data + transhdrlen, fraggap, 0);
   1487 				skb_prev->csum = csum_sub(skb_prev->csum,
   1488 							  skb->csum);
   1489 				data += fraggap;
   1490 				pskb_trim_unique(skb_prev, maxfraglen);
   1491 			}
   1492 			if (copy > 0 &&
   1493 			    getfrag(from, data + transhdrlen, offset,
   1494 				    copy, fraggap, skb) < 0) {
   1495 				err = -EFAULT;
   1496 				kfree_skb(skb);
   1497 				goto error;
   1498 			}
   1499 
   1500 			offset += copy;
   1501 			length -= copy + transhdrlen;
   1502 			transhdrlen = 0;
   1503 			exthdrlen = 0;
   1504 			dst_exthdrlen = 0;
   1505 
   1506 			/* Only the initial fragment is time stamped */
   1507 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
   1508 			cork->tx_flags = 0;
   1509 			skb_shinfo(skb)->tskey = tskey;
   1510 			tskey = 0;
   1511 			skb_zcopy_set(skb, uarg, &extra_uref);
   1512 
   1513 			if ((flags & MSG_CONFIRM) && !skb_prev)
   1514 				skb_set_dst_pending_confirm(skb, 1);
   1515 
   1516 			/*
   1517 			 * Put the packet on the pending queue
   1518 			 */
   1519 			if (!skb->destructor) {
   1520 				skb->destructor = sock_wfree;
   1521 				skb->sk = sk;
   1522 				wmem_alloc_delta += skb->truesize;
   1523 			}
   1524 			__skb_queue_tail(queue, skb);
   1525 			continue;
   1526 		}
   1527 
   1528 		if (copy > length)
   1529 			copy = length;
   1530 
   1531 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
   1532 		    skb_tailroom(skb) >= copy) {
   1533 			unsigned int off;
   1534 
   1535 			off = skb->len;
   1536 			if (getfrag(from, skb_put(skb, copy),
   1537 						offset, copy, off, skb) < 0) {
   1538 				__skb_trim(skb, off);
   1539 				err = -EFAULT;
   1540 				goto error;
   1541 			}
   1542 		} else if (!uarg || !uarg->zerocopy) {
   1543 			int i = skb_shinfo(skb)->nr_frags;
   1544 
   1545 			err = -ENOMEM;
   1546 			if (!sk_page_frag_refill(sk, pfrag))
   1547 				goto error;
   1548 
   1549 			if (!skb_can_coalesce(skb, i, pfrag->page,
   1550 					      pfrag->offset)) {
   1551 				err = -EMSGSIZE;
   1552 				if (i == MAX_SKB_FRAGS)
   1553 					goto error;
   1554 
   1555 				__skb_fill_page_desc(skb, i, pfrag->page,
   1556 						     pfrag->offset, 0);
   1557 				skb_shinfo(skb)->nr_frags = ++i;
   1558 				get_page(pfrag->page);
   1559 			}
   1560 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
   1561 			if (getfrag(from,
   1562 				    page_address(pfrag->page) + pfrag->offset,
   1563 				    offset, copy, skb->len, skb) < 0)
   1564 				goto error_efault;
   1565 
   1566 			pfrag->offset += copy;
   1567 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
   1568 			skb->len += copy;
   1569 			skb->data_len += copy;
   1570 			skb->truesize += copy;
   1571 			wmem_alloc_delta += copy;
   1572 		} else {
   1573 			err = skb_zerocopy_iter_dgram(skb, from, copy);
   1574 			if (err < 0)
   1575 				goto error;
   1576 		}
   1577 		offset += copy;
   1578 		length -= copy;
   1579 	}
   1580 
   1581 	if (wmem_alloc_delta)
   1582 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
   1583 	return 0;
   1584 
   1585 error_efault:
   1586 	err = -EFAULT;
   1587 error:
   1588 	if (uarg)
   1589 		sock_zerocopy_put_abort(uarg, extra_uref);
   1590 	cork->length -= length;
   1591 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
   1592 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
   1593 	return err;
   1594 }
   1595 
   1596 int ip6_append_data(struct sock *sk,
   1597 		    int getfrag(void *from, char *to, int offset, int len,
   1598 				int odd, struct sk_buff *skb),
   1599 		    void *from, int length, int transhdrlen,
   1600 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
   1601 		    struct rt6_info *rt, unsigned int flags)
   1602 {
   1603 	struct inet_sock *inet = inet_sk(sk);
   1604 	struct ipv6_pinfo *np = inet6_sk(sk);
   1605 	int exthdrlen;
   1606 	int err;
   1607 
   1608 	if (flags&MSG_PROBE)
   1609 		return 0;
   1610 	if (skb_queue_empty(&sk->sk_write_queue)) {
   1611 		/*
   1612 		 * setup for corking
   1613 		 */
   1614 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
   1615 				     ipc6, rt, fl6);
   1616 		if (err)
   1617 			return err;
   1618 
   1619 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
   1620 		length += exthdrlen;
   1621 		transhdrlen += exthdrlen;
   1622 	} else {
   1623 		fl6 = &inet->cork.fl.u.ip6;
   1624 		transhdrlen = 0;
   1625 	}
   1626 
   1627 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
   1628 				 &np->cork, sk_page_frag(sk), getfrag,
   1629 				 from, length, transhdrlen, flags, ipc6);
   1630 }
   1631 EXPORT_SYMBOL_GPL(ip6_append_data);
   1632 
   1633 static void ip6_cork_release(struct inet_cork_full *cork,
   1634 			     struct inet6_cork *v6_cork)
   1635 {
   1636 	if (v6_cork->opt) {
   1637 		kfree(v6_cork->opt->dst0opt);
   1638 		kfree(v6_cork->opt->dst1opt);
   1639 		kfree(v6_cork->opt->hopopt);
   1640 		kfree(v6_cork->opt->srcrt);
   1641 		kfree(v6_cork->opt);
   1642 		v6_cork->opt = NULL;
   1643 	}
   1644 
   1645 	if (cork->base.dst) {
   1646 		dst_release(cork->base.dst);
   1647 		cork->base.dst = NULL;
   1648 		cork->base.flags &= ~IPCORK_ALLFRAG;
   1649 	}
   1650 	memset(&cork->fl, 0, sizeof(cork->fl));
   1651 }
   1652 
   1653 struct sk_buff *__ip6_make_skb(struct sock *sk,
   1654 			       struct sk_buff_head *queue,
   1655 			       struct inet_cork_full *cork,
   1656 			       struct inet6_cork *v6_cork)
   1657 {
   1658 	struct sk_buff *skb, *tmp_skb;
   1659 	struct sk_buff **tail_skb;
   1660 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
   1661 	struct ipv6_pinfo *np = inet6_sk(sk);
   1662 	struct net *net = sock_net(sk);
   1663 	struct ipv6hdr *hdr;
   1664 	struct ipv6_txoptions *opt = v6_cork->opt;
   1665 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
   1666 	struct flowi6 *fl6 = &cork->fl.u.ip6;
   1667 	unsigned char proto = fl6->flowi6_proto;
   1668 
   1669 	skb = __skb_dequeue(queue);
   1670 	if (!skb)
   1671 		goto out;
   1672 	tail_skb = &(skb_shinfo(skb)->frag_list);
   1673 
   1674 	/* move skb->data to ip header from ext header */
   1675 	if (skb->data < skb_network_header(skb))
   1676 		__skb_pull(skb, skb_network_offset(skb));
   1677 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
   1678 		__skb_pull(tmp_skb, skb_network_header_len(skb));
   1679 		*tail_skb = tmp_skb;
   1680 		tail_skb = &(tmp_skb->next);
   1681 		skb->len += tmp_skb->len;
   1682 		skb->data_len += tmp_skb->len;
   1683 		skb->truesize += tmp_skb->truesize;
   1684 		tmp_skb->destructor = NULL;
   1685 		tmp_skb->sk = NULL;
   1686 	}
   1687 
   1688 	/* Allow local fragmentation. */
   1689 	skb->ignore_df = ip6_sk_ignore_df(sk);
   1690 
   1691 	*final_dst = fl6->daddr;
   1692 	__skb_pull(skb, skb_network_header_len(skb));
   1693 	if (opt && opt->opt_flen)
   1694 		ipv6_push_frag_opts(skb, opt, &proto);
   1695 	if (opt && opt->opt_nflen)
   1696 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
   1697 
   1698 	skb_push(skb, sizeof(struct ipv6hdr));
   1699 	skb_reset_network_header(skb);
   1700 	hdr = ipv6_hdr(skb);
   1701 
   1702 	ip6_flow_hdr(hdr, v6_cork->tclass,
   1703 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
   1704 					ip6_autoflowlabel(net, np), fl6));
   1705 	hdr->hop_limit = v6_cork->hop_limit;
   1706 	hdr->nexthdr = proto;
   1707 	hdr->saddr = fl6->saddr;
   1708 	hdr->daddr = *final_dst;
   1709 
   1710 	skb->priority = sk->sk_priority;
   1711 	skb->mark = sk->sk_mark;
   1712 
   1713 	skb->tstamp = cork->base.transmit_time;
   1714 
   1715 	skb_dst_set(skb, dst_clone(&rt->dst));
   1716 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
   1717 	if (proto == IPPROTO_ICMPV6) {
   1718 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
   1719 
   1720 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
   1721 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
   1722 	}
   1723 
   1724 	ip6_cork_release(cork, v6_cork);
   1725 out:
   1726 	return skb;
   1727 }
   1728 
   1729 int ip6_send_skb(struct sk_buff *skb)
   1730 {
   1731 	struct net *net = sock_net(skb->sk);
   1732 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
   1733 	int err;
   1734 
   1735 	err = ip6_local_out(net, skb->sk, skb);
   1736 	if (err) {
   1737 		if (err > 0)
   1738 			err = net_xmit_errno(err);
   1739 		if (err)
   1740 			IP6_INC_STATS(net, rt->rt6i_idev,
   1741 				      IPSTATS_MIB_OUTDISCARDS);
   1742 	}
   1743 
   1744 	return err;
   1745 }
   1746 
   1747 int ip6_push_pending_frames(struct sock *sk)
   1748 {
   1749 	struct sk_buff *skb;
   1750 
   1751 	skb = ip6_finish_skb(sk);
   1752 	if (!skb)
   1753 		return 0;
   1754 
   1755 	return ip6_send_skb(skb);
   1756 }
   1757 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
   1758 
   1759 static void __ip6_flush_pending_frames(struct sock *sk,
   1760 				       struct sk_buff_head *queue,
   1761 				       struct inet_cork_full *cork,
   1762 				       struct inet6_cork *v6_cork)
   1763 {
   1764 	struct sk_buff *skb;
   1765 
   1766 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
   1767 		if (skb_dst(skb))
   1768 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
   1769 				      IPSTATS_MIB_OUTDISCARDS);
   1770 		kfree_skb(skb);
   1771 	}
   1772 
   1773 	ip6_cork_release(cork, v6_cork);
   1774 }
   1775 
   1776 void ip6_flush_pending_frames(struct sock *sk)
   1777 {
   1778 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
   1779 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
   1780 }
   1781 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
   1782 
   1783 struct sk_buff *ip6_make_skb(struct sock *sk,
   1784 			     int getfrag(void *from, char *to, int offset,
   1785 					 int len, int odd, struct sk_buff *skb),
   1786 			     void *from, int length, int transhdrlen,
   1787 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
   1788 			     struct rt6_info *rt, unsigned int flags,
   1789 			     struct inet_cork_full *cork)
   1790 {
   1791 	struct inet6_cork v6_cork;
   1792 	struct sk_buff_head queue;
   1793 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
   1794 	int err;
   1795 
   1796 	if (flags & MSG_PROBE)
   1797 		return NULL;
   1798 
   1799 	__skb_queue_head_init(&queue);
   1800 
   1801 	cork->base.flags = 0;
   1802 	cork->base.addr = 0;
   1803 	cork->base.opt = NULL;
   1804 	cork->base.dst = NULL;
   1805 	v6_cork.opt = NULL;
   1806 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
   1807 	if (err) {
   1808 		ip6_cork_release(cork, &v6_cork);
   1809 		return ERR_PTR(err);
   1810 	}
   1811 	if (ipc6->dontfrag < 0)
   1812 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
   1813 
   1814 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
   1815 				&current->task_frag, getfrag, from,
   1816 				length + exthdrlen, transhdrlen + exthdrlen,
   1817 				flags, ipc6);
   1818 	if (err) {
   1819 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
   1820 		return ERR_PTR(err);
   1821 	}
   1822 
   1823 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
   1824 }